Replace spinning send_all() with a proper non-blocking send. When the socket write buffer limit is reached, we should stop trying to send and wait for the socket to become writable again.
Non-blocking TCP sockets can return in two different ways when the write buffer limit is reached: 1. ret = -1 and errno = EAGAIN/EWOULDBLOCK. No data has been written. 2. ret < total_size. Short write, only part of the message was transmitted. Handle both cases and keep track of how many bytes have been written in s->send_index. (This includes the 'length' header before the actual payload buffer.) Signed-off-by: Stefan Hajnoczi <stefa...@linux.vnet.ibm.com> --- net/socket.c | 50 +++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 45 insertions(+), 5 deletions(-) diff --git a/net/socket.c b/net/socket.c index e5e4e8d..be44105 100644 --- a/net/socket.c +++ b/net/socket.c @@ -40,6 +40,7 @@ typedef struct NetSocketState { int state; /* 0 = getting length, 1 = getting data */ unsigned int index; unsigned int packet_len; + unsigned int send_index; /* number of bytes sent (only SOCK_STREAM) */ uint8_t buf[4096]; struct sockaddr_in dgram_dst; /* contains inet host and port destination iff connectionless (SOCK_DGRAM) */ IOHandler *send_fn; /* differs between SOCK_STREAM/SOCK_DGRAM */ @@ -88,15 +89,54 @@ static void net_socket_writable(void *opaque) qemu_flush_queued_packets(&s->nc); } -/* XXX: we consider we can send the whole packet without blocking */ static ssize_t net_socket_receive(NetClientState *nc, const uint8_t *buf, size_t size) { NetSocketState *s = DO_UPCAST(NetSocketState, nc, nc); - uint32_t len; - len = htonl(size); + struct iovec iov[2]; + int iovcnt = 0; + ssize_t ret; + ssize_t total_size = 0; + uint32_t len = htonl(size); + unsigned int buf_index; + + /* Length header */ + if (s->send_index < sizeof(len)) { + iov[iovcnt].iov_base = (uint8_t *)&len + s->send_index; + iov[iovcnt].iov_len = sizeof(len) - s->send_index; + total_size += iov[iovcnt].iov_len; + iovcnt++; + + buf_index = 0; + } else { + buf_index = s->send_index - sizeof(len); + } + + assert(buf_index < size); + + /* Payload buffer */ + iov[iovcnt].iov_base = (uint8_t *)buf + buf_index; + iov[iovcnt].iov_len = size - buf_index; + total_size += iov[iovcnt].iov_len; + iovcnt++; + + do { + ret = writev(s->fd, iov, iovcnt); + } while (ret == -1 && ret == EINTR); - send_all(s->fd, (const uint8_t *)&len, sizeof(len)); - return send_all(s->fd, buf, size); + if (ret == -1 && errno == EAGAIN) { + ret = 0; /* handled further down */ + } + if (ret == -1) { + s->send_index = 0; + return -errno; + } + if (ret < total_size) { + s->send_index += ret; + net_socket_write_poll(s, true); + return 0; + } + s->send_index = 0; + return size; } static ssize_t net_socket_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size) -- 1.7.10.4