As suggested by Terry, I've cooked up a patch which halts the use of mbufs for storing tcp template structures. The structure was only used in two places; tcp_output.c when sending packets, and tcp_timer.c when sending keepalives. tcp_output now pulls the info directly from the tcpcb, while tcp_timer creates a short-term tcp template that is destroyed after use. The end result is that rather than 1 mbuf being the minimum used per connection, 0 mbufs is now the minimum. As a result, those with boxes handling a lot of connections should see greatly reduced mbuf usage. I've attached two patches; one for current, and one for stable. Please review / test, _especially_ if you're using IPv6 or IPSec - while those cases look correct, I'm not running either and haven't tested them. Thanks, Mike "Silby" Silbersack
Only in netinet.old/: icmp_var.h.orig Only in netinet.old/: ip_icmp.c.orig diff -u -r netinet.old/tcp_input.c netinet/tcp_input.c --- netinet.old/tcp_input.c Tue Jun 19 11:53:16 2001 +++ netinet/tcp_input.c Tue Jun 19 11:53:25 2001 @@ -1066,12 +1066,7 @@ } FREE(sin, M_SONAME); } - tp->t_template = tcp_template(tp); - if (tp->t_template == 0) { - tp = tcp_drop(tp, ENOBUFS); - dropsocket = 0; /* socket is already gone */ - goto drop; - } + tp->t_template = NULL; if ((taop = tcp_gettaocache(inp)) == NULL) { taop = &tao_noncached; bzero(taop, sizeof(*taop)); Only in netinet.old/: tcp_input.c.orig Only in netinet.old/: tcp_input.c.rej diff -u -r netinet.old/tcp_output.c netinet/tcp_output.c --- netinet.old/tcp_output.c Tue Jun 19 11:53:16 2001 +++ netinet/tcp_output.c Tue Jun 19 11:53:25 2001 @@ -630,16 +630,12 @@ m->m_len = hdrlen; } m->m_pkthdr.rcvif = (struct ifnet *)0; - if (tp->t_template == 0) - panic("tcp_output"); + #ifdef INET6 if (isipv6) { ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6, - sizeof(struct ip6_hdr)); - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, - sizeof(struct tcphdr)); + tcp_fillheaders(tp, ip6, th); } else #endif /* INET6 */ { @@ -647,10 +643,7 @@ ipov = (struct ipovly *)ip; th = (struct tcphdr *)(ip + 1); /* this picks up the pseudo header (w/o the length) */ - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip, - sizeof(struct ip)); - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, - sizeof(struct tcphdr)); + tcp_fillheaders(tp, ip, th); } /* Only in netinet.old/: tcp_output.c.orig Only in netinet.old/: tcp_seq.h.orig diff -u -r netinet.old/tcp_subr.c netinet/tcp_subr.c --- netinet.old/tcp_subr.c Tue Jun 19 11:53:16 2001 +++ netinet/tcp_subr.c Tue Jun 19 11:57:56 2001 @@ -220,32 +220,27 @@ #undef TCP_MINPROTOHDR } + /* - * Create template to be used to send tcp packets on a connection. - * Call after host entry created, allocates an mbuf and fills - * in a skeletal tcp/ip header, minimizing the amount of work - * necessary when the connection is used. + * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. + * tcp_template used to store this data in mbufs, but we now recopy it out + * of the tcpcb each time to conserve mbufs. */ -struct tcptemp * -tcp_template(tp) + +void +tcp_fillheaders(tp, ip_ptr, tcp_ptr) struct tcpcb *tp; + void *ip_ptr; + void *tcp_ptr; { - register struct inpcb *inp = tp->t_inpcb; - register struct mbuf *m; - register struct tcptemp *n; + struct inpcb *inp = tp->t_inpcb; + struct tcphdr *tcp_hdr = (struct tcphdr *)tcp_ptr; - if ((n = tp->t_template) == 0) { - m = m_get(M_DONTWAIT, MT_HEADER); - if (m == NULL) - return (0); - m->m_len = sizeof (struct tcptemp); - n = mtod(m, struct tcptemp *); - } #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { - register struct ip6_hdr *ip6; + struct ip6_hdr *ip6; - ip6 = (struct ip6_hdr *)n->tt_ipgen; + ip6 = (struct ip6_hdr *)ip_ptr; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | @@ -254,29 +249,52 @@ ip6->ip6_plen = sizeof(struct tcphdr); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; - n->tt_t.th_sum = 0; + tcp_hdr->th_sum = 0; } else #endif - { - struct ip *ip = (struct ip *)n->tt_ipgen; + { + struct ip *ip = (struct ip *) ip_ptr; - bzero(ip, sizeof(struct ip)); /* XXX overkill? */ + bzero(ip, sizeof(struct ip)); /* XXX overkill? */ ip->ip_vhl = IP_VHL_BORING; ip->ip_p = IPPROTO_TCP; ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; - n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(sizeof(struct tcphdr) + IPPROTO_TCP)); - } - n->tt_t.th_sport = inp->inp_lport; - n->tt_t.th_dport = inp->inp_fport; - n->tt_t.th_seq = 0; - n->tt_t.th_ack = 0; - n->tt_t.th_x2 = 0; - n->tt_t.th_off = 5; - n->tt_t.th_flags = 0; - n->tt_t.th_win = 0; - n->tt_t.th_urp = 0; + tcp_hdr->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(sizeof(struct tcphdr) + IPPROTO_TCP)); + } + + tcp_hdr->th_sport = inp->inp_lport; + tcp_hdr->th_dport = inp->inp_fport; + tcp_hdr->th_seq = 0; + tcp_hdr->th_ack = 0; + tcp_hdr->th_x2 = 0; + tcp_hdr->th_off = 5; + tcp_hdr->th_flags = 0; + tcp_hdr->th_win = 0; + tcp_hdr->th_urp = 0; +} + + +/* + * Create template to be used to send tcp packets on a connection. + * Allocates an mbuf and fills in a skeletal tcp/ip header. The only + * use for this function is in keepalives, who like to use tcp_respond. + */ +struct tcptemp * +tcp_maketemplate(tp) + struct tcpcb *tp; +{ + struct mbuf *m; + struct tcptemp *n; + + m = m_get(M_DONTWAIT, MT_HEADER); + if (m == NULL) + return (0); + m->m_len = sizeof (struct tcptemp); + n = mtod(m, struct tcptemp *); + + tcp_fillheaders(tp, (void *)&n->tt_ipgen, (void *)&n->tt_t); return (n); } @@ -706,7 +724,7 @@ FREE(q, M_TSEGQ); } if (tp->t_template) - (void) m_free(dtom(tp->t_template)); + panic("t_template non-null!"); inp->inp_ppcb = NULL; soisdisconnected(so); #ifdef INET6 @@ -1347,7 +1365,7 @@ #endif /* INET6 */ struct tcphdr *th; - if (!tp || !tp->t_template || !(inp = tp->t_inpcb)) + if (!tp || !(inp = tp->t_inpcb)) return 0; MGETHDR(m, M_DONTWAIT, MT_DATA); if (!m) @@ -1359,10 +1377,7 @@ th = (struct tcphdr *)(ip6 + 1); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6, - sizeof(struct ip6_hdr)); - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, - sizeof(struct tcphdr)); + tcp_fillheaders(tp, ip6, th); hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } else #endif /* INET6 */ @@ -1370,10 +1385,7 @@ ip = mtod(m, struct ip *); th = (struct tcphdr *)(ip + 1); m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip, - sizeof(struct ip)); - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, - sizeof(struct tcphdr)); + tcp_fillheaders(tp, ip, th); hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } Only in netinet.old/: tcp_subr.c.orig diff -u -r netinet.old/tcp_timer.c netinet/tcp_timer.c --- netinet.old/tcp_timer.c Tue Jun 19 11:53:16 2001 +++ netinet/tcp_timer.c Tue Jun 19 11:56:13 2001 @@ -41,6 +41,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> +#include <sys/mbuf.h> #include <sys/sysctl.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -227,6 +228,7 @@ void *xtp; { struct tcpcb *tp = xtp; + struct tcptemp *t_template; int s; #ifdef TCPDEBUG int ostate; @@ -273,9 +275,14 @@ &tp->t_template->tt_t, (struct mbuf *)NULL, tp->rcv_nxt - 1, tp->snd_una - 1, 0); #else - tcp_respond(tp, tp->t_template->tt_ipgen, - &tp->t_template->tt_t, (struct mbuf *)NULL, - tp->rcv_nxt, tp->snd_una - 1, 0); + + t_template = tcp_maketemplate(tp); + if (t_template) { + tcp_respond(tp, t_template->tt_ipgen, + &t_template->tt_t, (struct mbuf *)NULL, + tp->rcv_nxt, tp->snd_una - 1, 0); + (void) m_free(dtom(t_template)); + } #endif callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp); } else Only in netinet.old/: tcp_timer.c.orig diff -u -r netinet.old/tcp_usrreq.c netinet/tcp_usrreq.c --- netinet.old/tcp_usrreq.c Tue Jun 19 11:53:16 2001 +++ netinet/tcp_usrreq.c Tue Jun 19 11:53:27 2001 @@ -744,11 +744,7 @@ inp->inp_fport = sin->sin_port; in_pcbrehash(inp); - tp->t_template = tcp_template(tp); - if (tp->t_template == 0) { - in_pcbdisconnect(inp); - return ENOBUFS; - } + tp->t_template = NULL; /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && @@ -841,11 +837,7 @@ inp->in6p_flowinfo = sin6->sin6_flowinfo; in_pcbrehash(inp); - tp->t_template = tcp_template(tp); - if (tp->t_template == 0) { - in6_pcbdisconnect(inp); - return ENOBUFS; - } + tp->t_template = NULL; /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && Only in netinet.old/: tcp_usrreq.c.orig Only in netinet.old/: tcp_usrreq.c.rej diff -u -r netinet.old/tcp_var.h netinet/tcp_var.h --- netinet.old/tcp_var.h Tue Jun 19 11:53:16 2001 +++ netinet/tcp_var.h Tue Jun 19 11:53:27 2001 @@ -400,7 +400,8 @@ void tcp_setpersist __P((struct tcpcb *)); void tcp_slowtimo __P((void)); struct tcptemp * - tcp_template __P((struct tcpcb *)); + tcp_maketemplate __P((struct tcpcb *)); +void tcp_fillheaders __P((struct tcpcb *, void *, void *)); struct tcpcb * tcp_timers __P((struct tcpcb *, int)); void tcp_trace __P((int, int, struct tcpcb *, void *, struct tcphdr *, Only in netinet.old/: tcp_var.h.orig Only in netinet.old/: udp_usrreq.c.orig
diff -u -r netinet.old/tcp_input.c netinet/tcp_input.c --- netinet.old/tcp_input.c Mon Jun 18 20:45:07 2001 +++ netinet/tcp_input.c Mon Jun 18 21:04:04 2001 @@ -1127,12 +1127,7 @@ } FREE(sin, M_SONAME); } - tp->t_template = tcp_template(tp); - if (tp->t_template == 0) { - tp = tcp_drop(tp, ENOBUFS); - dropsocket = 0; /* socket is already gone */ - goto drop; - } + tp->t_template = NULL; if ((taop = tcp_gettaocache(inp)) == NULL) { taop = &tao_noncached; bzero(taop, sizeof(*taop)); diff -u -r netinet.old/tcp_output.c netinet/tcp_output.c --- netinet.old/tcp_output.c Mon Jun 18 20:45:07 2001 +++ netinet/tcp_output.c Mon Jun 18 20:59:59 2001 @@ -632,16 +632,12 @@ m->m_len = hdrlen; } m->m_pkthdr.rcvif = (struct ifnet *)0; - if (tp->t_template == 0) - panic("tcp_output"); + #ifdef INET6 if (isipv6) { ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)(ip6 + 1); - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6, - sizeof(struct ip6_hdr)); - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, - sizeof(struct tcphdr)); + tcp_fillheaders(tp, ip6, th); } else #endif /* INET6 */ { @@ -649,10 +645,7 @@ ipov = (struct ipovly *)ip; th = (struct tcphdr *)(ip + 1); /* this picks up the pseudo header (w/o the length) */ - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip, - sizeof(struct ip)); - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, - sizeof(struct tcphdr)); + tcp_fillheaders(tp, ip, th); } /* diff -u -r netinet.old/tcp_subr.c netinet/tcp_subr.c --- netinet.old/tcp_subr.c Mon Jun 18 20:45:07 2001 +++ netinet/tcp_subr.c Tue Jun 19 11:47:57 2001 @@ -217,32 +217,27 @@ #undef TCP_MINPROTOHDR } + /* - * Create template to be used to send tcp packets on a connection. - * Call after host entry created, allocates an mbuf and fills - * in a skeletal tcp/ip header, minimizing the amount of work - * necessary when the connection is used. + * Fill in the IP and TCP headers for an outgoing packet, given the tcpcb. + * tcp_template used to store this data in mbufs, but we now recopy it out + * of the tcpcb each time to conserve mbufs. */ -struct tcptemp * -tcp_template(tp) + +void +tcp_fillheaders(tp, ip_ptr, tcp_ptr) struct tcpcb *tp; + void *ip_ptr; + void *tcp_ptr; { - register struct inpcb *inp = tp->t_inpcb; - register struct mbuf *m; - register struct tcptemp *n; + struct inpcb *inp = tp->t_inpcb; + struct tcphdr *tcp_hdr = (struct tcphdr *)tcp_ptr; - if ((n = tp->t_template) == 0) { - m = m_get(M_DONTWAIT, MT_HEADER); - if (m == NULL) - return (0); - m->m_len = sizeof (struct tcptemp); - n = mtod(m, struct tcptemp *); - } #ifdef INET6 if ((inp->inp_vflag & INP_IPV6) != 0) { - register struct ip6_hdr *ip6; + struct ip6_hdr *ip6; - ip6 = (struct ip6_hdr *)n->tt_ipgen; + ip6 = (struct ip6_hdr *)ip_ptr; ip6->ip6_flow = (ip6->ip6_flow & ~IPV6_FLOWINFO_MASK) | (inp->in6p_flowinfo & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | @@ -251,29 +246,52 @@ ip6->ip6_plen = sizeof(struct tcphdr); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; - n->tt_t.th_sum = 0; + tcp_hdr->th_sum = 0; } else #endif - { - struct ip *ip = (struct ip *)n->tt_ipgen; + { + struct ip *ip = (struct ip *) ip_ptr; - bzero(ip, sizeof(struct ip)); /* XXX overkill? */ + bzero(ip, sizeof(struct ip)); /* XXX overkill? */ ip->ip_vhl = IP_VHL_BORING; ip->ip_p = IPPROTO_TCP; ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; - n->tt_t.th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(sizeof(struct tcphdr) + IPPROTO_TCP)); - } - n->tt_t.th_sport = inp->inp_lport; - n->tt_t.th_dport = inp->inp_fport; - n->tt_t.th_seq = 0; - n->tt_t.th_ack = 0; - n->tt_t.th_x2 = 0; - n->tt_t.th_off = 5; - n->tt_t.th_flags = 0; - n->tt_t.th_win = 0; - n->tt_t.th_urp = 0; + tcp_hdr->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(sizeof(struct tcphdr) + IPPROTO_TCP)); + } + + tcp_hdr->th_sport = inp->inp_lport; + tcp_hdr->th_dport = inp->inp_fport; + tcp_hdr->th_seq = 0; + tcp_hdr->th_ack = 0; + tcp_hdr->th_x2 = 0; + tcp_hdr->th_off = 5; + tcp_hdr->th_flags = 0; + tcp_hdr->th_win = 0; + tcp_hdr->th_urp = 0; +} + + +/* + * Create template to be used to send tcp packets on a connection. + * Allocates an mbuf and fills in a skeletal tcp/ip header. The only + * use for this function is in keepalives, who like to use tcp_respond. + */ +struct tcptemp * +tcp_maketemplate(tp) + struct tcpcb *tp; +{ + struct mbuf *m; + struct tcptemp *n; + + m = m_get(M_DONTWAIT, MT_HEADER); + if (m == NULL) + return (0); + m->m_len = sizeof (struct tcptemp); + n = mtod(m, struct tcptemp *); + + tcp_fillheaders(tp, (void *)&n->tt_ipgen, (void *)&n->tt_t); return (n); } @@ -702,7 +720,7 @@ FREE(q, M_TSEGQ); } if (tp->t_template) - (void) m_free(dtom(tp->t_template)); + panic("t_template non-null!"); inp->inp_ppcb = NULL; soisdisconnected(so); #ifdef INET6 @@ -1339,7 +1357,7 @@ #endif /* INET6 */ struct tcphdr *th; - if (!tp || !tp->t_template || !(inp = tp->t_inpcb)) + if (!tp || !(inp = tp->t_inpcb)) return 0; MGETHDR(m, M_DONTWAIT, MT_DATA); if (!m) @@ -1351,10 +1369,7 @@ th = (struct tcphdr *)(ip6 + 1); m->m_pkthdr.len = m->m_len = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip6, - sizeof(struct ip6_hdr)); - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, - sizeof(struct tcphdr)); + tcp_fillheaders(tp, ip6, th); hdrsiz = ipsec6_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } else #endif /* INET6 */ @@ -1362,10 +1377,7 @@ ip = mtod(m, struct ip *); th = (struct tcphdr *)(ip + 1); m->m_pkthdr.len = m->m_len = sizeof(struct tcpiphdr); - bcopy((caddr_t)tp->t_template->tt_ipgen, (caddr_t)ip, - sizeof(struct ip)); - bcopy((caddr_t)&tp->t_template->tt_t, (caddr_t)th, - sizeof(struct tcphdr)); + tcp_fillheaders(tp, ip, th); ip->ip_vhl = IP_VHL_BORING; hdrsiz = ipsec4_hdrsiz(m, IPSEC_DIR_OUTBOUND, inp); } Only in netinet.old/: tcp_subr.c.new diff -u -r netinet.old/tcp_timer.c netinet/tcp_timer.c --- netinet.old/tcp_timer.c Mon Jun 18 20:45:07 2001 +++ netinet/tcp_timer.c Tue Jun 19 11:28:26 2001 @@ -41,6 +41,7 @@ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> +#include <sys/mbuf.h> #include <sys/sysctl.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -222,6 +223,7 @@ void *xtp; { struct tcpcb *tp = xtp; + struct tcptemp *t_template; int s; #ifdef TCPDEBUG int ostate; @@ -259,9 +261,14 @@ * correspondent TCP to respond. */ tcpstat.tcps_keepprobe++; - tcp_respond(tp, tp->t_template->tt_ipgen, - &tp->t_template->tt_t, (struct mbuf *)NULL, - tp->rcv_nxt, tp->snd_una - 1, 0); + t_template = tcp_maketemplate(tp); + if (t_template) { + tcp_respond(tp, t_template->tt_ipgen, + &t_template->tt_t, (struct mbuf *)NULL, + tp->rcv_nxt, tp->snd_una - 1, 0); + (void) m_free(dtom(t_template)); + } + callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp); } else callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp); diff -u -r netinet.old/tcp_usrreq.c netinet/tcp_usrreq.c --- netinet.old/tcp_usrreq.c Mon Jun 18 20:45:07 2001 +++ netinet/tcp_usrreq.c Mon Jun 18 21:04:43 2001 @@ -749,11 +749,7 @@ inp->inp_fport = sin->sin_port; in_pcbrehash(inp); - tp->t_template = tcp_template(tp); - if (tp->t_template == 0) { - in_pcbdisconnect(inp); - return ENOBUFS; - } + tp->t_template = NULL; /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && @@ -841,11 +837,7 @@ inp->in6p_flowinfo = sin6->sin6_flowinfo; in_pcbrehash(inp); - tp->t_template = tcp_template(tp); - if (tp->t_template == 0) { - in6_pcbdisconnect(inp); - return ENOBUFS; - } + tp->t_template = NULL; /* Compute window scaling to request. */ while (tp->request_r_scale < TCP_MAX_WINSHIFT && diff -u -r netinet.old/tcp_var.h netinet/tcp_var.h --- netinet.old/tcp_var.h Mon Jun 18 20:45:07 2001 +++ netinet/tcp_var.h Tue Jun 19 11:27:44 2001 @@ -400,7 +400,8 @@ void tcp_setpersist __P((struct tcpcb *)); void tcp_slowtimo __P((void)); struct tcptemp * - tcp_template __P((struct tcpcb *)); + tcp_maketemplate __P((struct tcpcb *)); +void tcp_fillheaders __P((struct tcpcb *, void *, void *)); struct tcpcb * tcp_timers __P((struct tcpcb *, int)); void tcp_trace __P((int, int, struct tcpcb *, void *, struct tcphdr *,