Three bugs in the slirp code have an enormous adverse effect on networking performance.
1. The maximum TCP segment size for data flowing from the VM to the host is unnecessarily limited to 512 bytes. 1460 bytes is the appropriate value for Ethernet. 2. TCP acknowledgements are being delayed unnecessarily, in violation of the TCP Congestion Control RFC (2581). There is no reason to delay TCP acknowledgements (and certainly no reason to give special treatment to packets consisting of a single ESC character, as the code does now!). 3. qemu sleeps soundly while packets back up in slirp's buffers. slirp socket fds should be added to the main qemu select() loop to avoid unnecessary delays. As Ken Duda mentioned in an earlier thread, measurements with a simple Python script indicate that the attached patch accelerates TCP throughput from about 2 megabytes/sec to 9 megabytes/sec, in both directions. I'm sure many folks would benefit from this improvement; please let me know if there is anything I can do to help nudge it into CVS. --Ed
diff -BurN qemu-snapshot-2006-03-27_23.orig/slirp/tcp.h qemu-snapshot-2006-03-27_23/slirp/tcp.h --- qemu-snapshot-2006-03-27_23.orig/slirp/tcp.h 2004-04-21 17:10:47.000000000 -0700 +++ qemu-snapshot-2006-03-27_23/slirp/tcp.h 2006-04-11 15:22:05.000000000 -0700 @@ -100,8 +100,10 @@ * With an IP MSS of 576, this is 536, * but 512 is probably more convenient. * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)). + * + * We make this 1460 because we only care about Ethernet in the qemu context. */ -#define TCP_MSS 512 +#define TCP_MSS 1460 #define TCP_MAXWIN 65535 /* largest value for (unscaled) window */ diff -BurN qemu-snapshot-2006-03-27_23.orig/slirp/tcp_input.c qemu-snapshot-2006-03-27_23/slirp/tcp_input.c --- qemu-snapshot-2006-03-27_23.orig/slirp/tcp_input.c 2004-10-07 16:27:35.000000000 -0700 +++ qemu-snapshot-2006-03-27_23/slirp/tcp_input.c 2006-04-11 15:22:05.000000000 -0700 @@ -580,28 +580,11 @@ * congestion avoidance sender won't send more until * he gets an ACK. * - * Here are 3 interpretations of what should happen. - * The best (for me) is to delay-ack everything except - * if it's a one-byte packet containing an ESC - * (this means it's an arrow key (or similar) sent using - * Nagel, hence there will be no echo) - * The first of these is the original, the second is the - * middle ground between the other 2 + * It is better to not delay acks at all to maximize + * TCP throughput. See RFC 2581. */ -/* if (((unsigned)ti->ti_len < tp->t_maxseg)) { - */ -/* if (((unsigned)ti->ti_len < tp->t_maxseg && - * (so->so_iptos & IPTOS_LOWDELAY) == 0) || - * ((so->so_iptos & IPTOS_LOWDELAY) && - * ((struct tcpiphdr_2 *)ti)->first_char == (char)27)) { - */ - if ((unsigned)ti->ti_len == 1 && - ((struct tcpiphdr_2 *)ti)->first_char == (char)27) { - tp->t_flags |= TF_ACKNOW; - tcp_output(tp); - } else { - tp->t_flags |= TF_DELACK; - } + tp->t_flags |= TF_ACKNOW; + tcp_output(tp); return; } } /* header prediction */ diff -BurN qemu-snapshot-2006-03-27_23.orig/vl.c qemu-snapshot-2006-03-27_23/vl.c --- qemu-snapshot-2006-03-27_23.orig/vl.c 2006-04-11 15:21:27.000000000 -0700 +++ qemu-snapshot-2006-03-27_23/vl.c 2006-04-11 15:22:05.000000000 -0700 @@ -4026,7 +4026,7 @@ void main_loop_wait(int timeout) { IOHandlerRecord *ioh, *ioh_next; - fd_set rfds, wfds; + fd_set rfds, wfds, xfds; int ret, nfds; struct timeval tv; @@ -4041,6 +4041,7 @@ nfds = -1; FD_ZERO(&rfds); FD_ZERO(&wfds); + FD_ZERO(&xfds); for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next) { if (ioh->fd_read && (!ioh->fd_read_poll || @@ -4062,7 +4063,12 @@ #else tv.tv_usec = timeout * 1000; #endif - ret = select(nfds + 1, &rfds, &wfds, NULL, &tv); +#if defined(CONFIG_SLIRP) + if (slirp_inited) { + slirp_select_fill(&nfds, &rfds, &wfds, &xfds); + } +#endif + ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv); if (ret > 0) { /* XXX: better handling of removal */ for(ioh = first_io_handler; ioh != NULL; ioh = ioh_next) {
_______________________________________________ Qemu-devel mailing list Qemu-devel@nongnu.org http://lists.nongnu.org/mailman/listinfo/qemu-devel