Three bugs in the slirp code have an enormous adverse effect on
networking performance.

1. The maximum TCP segment size for data flowing from the VM to the
host is unnecessarily limited to 512 bytes. 1460 bytes is the
appropriate value for Ethernet.

2. TCP acknowledgements are being delayed unnecessarily, in violation
of the TCP Congestion Control RFC (2581). There is no reason to delay
TCP acknowledgements (and certainly no reason to give special
treatment to packets consisting of a single ESC character, as the code
does now!).

3. qemu sleeps soundly while packets back up in slirp's buffers. slirp
socket fds should be added to the main qemu select() loop to avoid
unnecessary delays.

As Ken Duda mentioned in an earlier thread, measurements with a simple
Python script indicate that the attached patch accelerates TCP
throughput from about 2 megabytes/sec to 9 megabytes/sec, in both
directions.

I'm sure many folks would benefit from this improvement; please let me
know if there is anything I can do to help nudge it into CVS.

--Ed
diff -BurN qemu-snapshot-2006-03-27_23.orig/slirp/tcp.h qemu-snapshot-2006-03-27_23/slirp/tcp.h
--- qemu-snapshot-2006-03-27_23.orig/slirp/tcp.h	2004-04-21 17:10:47.000000000 -0700
+++ qemu-snapshot-2006-03-27_23/slirp/tcp.h	2006-04-11 15:22:05.000000000 -0700
@@ -100,8 +100,10 @@
  * With an IP MSS of 576, this is 536,
  * but 512 is probably more convenient.
  * This should be defined as MIN(512, IP_MSS - sizeof (struct tcpiphdr)).
+ *
+ * We make this 1460 because we only care about Ethernet in the qemu context.
  */
-#define	TCP_MSS	512
+#define	TCP_MSS	1460
 
 #define	TCP_MAXWIN	65535	/* largest value for (unscaled) window */
 
diff -BurN qemu-snapshot-2006-03-27_23.orig/slirp/tcp_input.c qemu-snapshot-2006-03-27_23/slirp/tcp_input.c
--- qemu-snapshot-2006-03-27_23.orig/slirp/tcp_input.c	2004-10-07 16:27:35.000000000 -0700
+++ qemu-snapshot-2006-03-27_23/slirp/tcp_input.c	2006-04-11 15:22:05.000000000 -0700
@@ -580,28 +580,11 @@
 			 *	congestion avoidance sender won't send more until
 			 *	he gets an ACK.
 			 * 
-			 * Here are 3 interpretations of what should happen.
-			 * The best (for me) is to delay-ack everything except
-			 * if it's a one-byte packet containing an ESC
-			 * (this means it's an arrow key (or similar) sent using
-			 * Nagel, hence there will be no echo)
-			 * The first of these is the original, the second is the
-			 * middle ground between the other 2
+			 * It is better to not delay acks at all to maximize
+			 * TCP throughput.  See RFC 2581.
 			 */ 
-/*			if (((unsigned)ti->ti_len < tp->t_maxseg)) {
- */			     
-/*			if (((unsigned)ti->ti_len < tp->t_maxseg && 
- *			     (so->so_iptos & IPTOS_LOWDELAY) == 0) ||
- *			    ((so->so_iptos & IPTOS_LOWDELAY) && 
- *			     ((struct tcpiphdr_2 *)ti)->first_char == (char)27)) {
- */
-			if ((unsigned)ti->ti_len == 1 &&
-			    ((struct tcpiphdr_2 *)ti)->first_char == (char)27) {
-				tp->t_flags |= TF_ACKNOW;
-				tcp_output(tp);
-			} else {
-				tp->t_flags |= TF_DELACK;
-			}
+			tp->t_flags |= TF_ACKNOW;
+			tcp_output(tp);
 			return;
 		}
 	} /* header prediction */
diff -BurN qemu-snapshot-2006-03-27_23.orig/vl.c qemu-snapshot-2006-03-27_23/vl.c
--- qemu-snapshot-2006-03-27_23.orig/vl.c	2006-04-11 15:21:27.000000000 -0700
+++ qemu-snapshot-2006-03-27_23/vl.c	2006-04-11 15:22:05.000000000 -0700
@@ -4026,7 +4026,7 @@
 void main_loop_wait(int timeout)
 {
     IOHandlerRecord *ioh, *ioh_next;
-    fd_set rfds, wfds;
+    fd_set rfds, wfds, xfds;
     int ret, nfds;
     struct timeval tv;
 
@@ -4041,6 +4041,7 @@
     nfds = -1;
     FD_ZERO(&rfds);
     FD_ZERO(&wfds);
+    FD_ZERO(&xfds);
     for(ioh = first_io_handler; ioh != NULL; ioh = ioh->next) {
         if (ioh->fd_read &&
             (!ioh->fd_read_poll ||
@@ -4062,7 +4063,12 @@
 #else
     tv.tv_usec = timeout * 1000;
 #endif
-    ret = select(nfds + 1, &rfds, &wfds, NULL, &tv);
+#if defined(CONFIG_SLIRP)
+    if (slirp_inited) {
+        slirp_select_fill(&nfds, &rfds, &wfds, &xfds);
+    }
+#endif
+    ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv);
     if (ret > 0) {
         /* XXX: better handling of removal */
         for(ioh = first_io_handler; ioh != NULL; ioh = ioh_next) {

_______________________________________________
Qemu-devel mailing list
Qemu-devel@nongnu.org
http://lists.nongnu.org/mailman/listinfo/qemu-devel

Reply via email to