Hi, I changed Slirp output to use vectored IO to avoid the slowdown from memcpy (see the patch for the work in progress, gives a small performance improvement). But then I got the idea that using AIO would be nice at the outgoing end of the network IO processing. In fact, vectored AIO model could even be used for the generic DMA! The benefit is that no buffering or copying should be needed.
Instead of void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, int len, int is_write); and its device variant, we'd have something like int qemu_lio_listio(int mode, struct GenericAIOcb *list[], unsigned int nent, IOCompletionFunc *cb); Each stage would translate the IO list and callback as needed and only the final stage would perform the IO or memcpy. This would be used in each stage of the chain memory<->IOMMU<->device<->SLIRP<->host network device. Of course some kind of host support for vectored AIO for these devices is required. On target side, devices that can do scatter/gather DMA would benefit most. For the specific Sparc32 case, unfortunately Lance bus byte swapping makes buffering necessary at that stage, unless we can make N vectors with just a single byte faster than memcpy + bswap of memory block with size N. Comments?
Index: qemu/vl.c =================================================================== --- qemu.orig/vl.c 2007-10-27 07:06:30.000000000 +0000 +++ qemu/vl.c 2007-10-27 11:06:09.000000000 +0000 @@ -1540,8 +1540,46 @@ } /***********************************************************/ -/* character device */ +/* Helpers for vectored IO*/ +static void qemu_readv_with_read(void *opaque, IOReadHandler *fd_read, + const struct qemu_iovec *vector, int count) +{ +#if 1 + int i, currlen = 0; + char buf[8192]; + + if (fd_read) { + for (i = 0; i < count; i++) { + if (currlen + vector[i].iov_len < sizeof(buf)) + memcpy(&buf[currlen], vector[i].iov_base, vector[i].iov_len); + else + fprintf(stderr, "bad currlen %d iov.len %ld\n", currlen, vector[i].iov_len); + currlen += vector[i].iov_len; + } + fd_read(opaque, buf, currlen); + } +#else + int i; + + if (fd_read) + for (i = 0; i < count; i++) + fd_read(opaque, vector[i].iov_base, vector[i].iov_len); +#endif +} +static void qemu_read_with_readv(void *opaque, IOReadvHandler *fd_readv, + const uint8_t *buf, int size) +{ + struct qemu_iovec iov; + + iov.iov_base = buf; + iov.iov_len = size; + if (fd_readv) + fd_readv(opaque, &iov, 1); +} + +/***********************************************************/ +/* character device */ static void qemu_chr_event(CharDriverState *s, int event) { if (!s->chr_event) @@ -3573,6 +3611,18 @@ return vc; } +VLANClientState *qemu_new_vlan_client_iov(VLANState *vlan, + IOReadvHandler *fd_readv, + IOCanRWHandler *fd_can_read, + void *opaque) +{ + VLANClientState *vc; + + vc = qemu_new_vlan_client(vlan, NULL, fd_can_read, opaque); + vc->fd_readv = fd_readv; + return vc; +} + int qemu_can_send_packet(VLANClientState *vc1) { VLANState *vlan = vc1->vlan; @@ -3598,7 +3648,26 @@ #endif for(vc = vlan->first_client; vc != NULL; vc = vc->next) { if (vc != vc1) { - vc->fd_read(vc->opaque, buf, size); + if (vc->fd_read) + vc->fd_read(vc->opaque, buf, size); + else if (vc->fd_readv) + qemu_read_with_readv(vc->opaque, vc->fd_readv, buf, size); + } + } +} + +void qemu_send_packet_iov(VLANClientState *vc1, const struct qemu_iovec *vector, + int count) +{ + VLANState *vlan = vc1->vlan; + VLANClientState *vc; + + for(vc = vlan->first_client; vc != NULL; vc = vc->next) { + if (vc != vc1) { + if (vc->fd_readv) + vc->fd_readv(vc->opaque, vector, count); + else if (vc->fd_read) + qemu_readv_with_read(vc->opaque, vc->fd_read, vector, count); } } } @@ -3626,6 +3695,13 @@ qemu_send_packet(slirp_vc, pkt, pkt_len); } +void slirp_output_iov(const struct qemu_iovec *vector, int count) +{ + if (!slirp_vc) + return; + qemu_send_packet_iov(slirp_vc, vector, count); +} + static void slirp_receive(void *opaque, const uint8_t *buf, int size) { #if 0 @@ -4944,13 +5020,12 @@ static IOHandlerRecord *first_io_handler; -/* XXX: fd_read_poll should be suppressed, but an API change is - necessary in the character devices to suppress fd_can_read(). */ -int qemu_set_fd_handler2(int fd, - IOCanRWHandler *fd_read_poll, - IOHandler *fd_read, - IOHandler *fd_write, - void *opaque) +static IOHandlerRecord * +qemu_set_fd_handler3(int fd, + IOCanRWHandler *fd_read_poll, + IOHandler *fd_read, + IOHandler *fd_write, + void *opaque) { IOHandlerRecord **pioh, *ioh; @@ -4973,17 +5048,38 @@ } ioh = qemu_mallocz(sizeof(IOHandlerRecord)); if (!ioh) - return -1; + return NULL; ioh->next = first_io_handler; first_io_handler = ioh; found: ioh->fd = fd; ioh->fd_read_poll = fd_read_poll; ioh->fd_read = fd_read; +#if 0 + if (!fd_read) + ioh->fd_readv = NULL; +#endif ioh->fd_write = fd_write; ioh->opaque = opaque; ioh->deleted = 0; } + return ioh; +} + +/* XXX: fd_read_poll should be suppressed, but an API change is + necessary in the character devices to suppress fd_can_read(). */ +int qemu_set_fd_handler2(int fd, + IOCanRWHandler *fd_read_poll, + IOHandler *fd_read, + IOHandler *fd_write, + void *opaque) +{ + IOHandlerRecord *ioh; + + ioh = qemu_set_fd_handler3(fd, NULL, fd_read, fd_write, opaque); + if (!ioh) + return -1; + return 0; } @@ -4995,6 +5091,25 @@ return qemu_set_fd_handler2(fd, NULL, fd_read, fd_write, opaque); } +#if 0 +int qemu_set_fd_handler_iov(int fd, + IOHandler *fd_readv, + IOHandler *fd_writev, + void *opaque) +{ + IOHandlerRecord *ioh; + + ioh = qemu_set_fd_handler3(fd, NULL, NULL, NULL, opaque); + if (!ioh) + return -1; + + ioh->fd_readv = fd_readv; + ioh->fd_writev = fd_writev; + + return 0; +} +#endif + /***********************************************************/ /* Polling handling */ Index: qemu/vl.h =================================================================== --- qemu.orig/vl.h 2007-10-27 07:11:15.000000000 +0000 +++ qemu/vl.h 2007-10-27 08:03:00.000000000 +0000 @@ -263,6 +263,12 @@ /* async I/O support */ typedef void IOReadHandler(void *opaque, const uint8_t *buf, int size); +struct qemu_iovec { + void *iov_base; + size_t iov_len; +}; +typedef void IOReadvHandler(void *opaque, const struct qemu_iovec *vector, + int count); typedef int IOCanRWHandler(void *opaque); typedef void IOHandler(void *opaque); @@ -275,6 +281,10 @@ IOHandler *fd_read, IOHandler *fd_write, void *opaque); +int qemu_set_fd_handler_iov(int fd, + IOHandler *fd_readv, + IOHandler *fd_writev, + void *opaque); /* Polling handling */ @@ -396,6 +406,7 @@ struct VLANClientState { IOReadHandler *fd_read; + IOReadvHandler *fd_readv; /* Packets may still be sent if this returns zero. It's used to rate-limit the slirp code. */ IOCanRWHandler *fd_can_read; @@ -417,8 +428,14 @@ IOReadHandler *fd_read, IOCanRWHandler *fd_can_read, void *opaque); +VLANClientState *qemu_new_vlan_client_iov(VLANState *vlan, + IOReadvHandler *fd_readv, + IOCanRWHandler *fd_can_read, + void *opaque); int qemu_can_send_packet(VLANClientState *vc); void qemu_send_packet(VLANClientState *vc, const uint8_t *buf, int size); +void qemu_send_packet_iov(VLANClientState *vc, const struct qemu_iovec *vector, + int count); void qemu_handler_true(void *opaque); void do_info_network(void); Index: qemu/slirp/libslirp.h =================================================================== --- qemu.orig/slirp/libslirp.h 2007-10-27 07:06:13.000000000 +0000 +++ qemu/slirp/libslirp.h 2007-10-27 08:18:10.000000000 +0000 @@ -17,6 +17,8 @@ /* you must provide the following functions: */ int slirp_can_output(void); void slirp_output(const uint8_t *pkt, int pkt_len); +struct qemu_iovec; +void slirp_output_iov(const struct qemu_iovec *vector, int count); int slirp_redir(int is_udp, int host_port, struct in_addr guest_addr, int guest_port); Index: qemu/slirp/slirp.c =================================================================== --- qemu.orig/slirp/slirp.c 2007-10-27 07:06:19.000000000 +0000 +++ qemu/slirp/slirp.c 2007-10-27 10:44:18.000000000 +0000 @@ -636,19 +636,22 @@ /* output the IP packet to the ethernet device */ void if_encap(const uint8_t *ip_data, int ip_data_len) { - uint8_t buf[1600]; - struct ethhdr *eh = (struct ethhdr *)buf; - - if (ip_data_len + ETH_HLEN > sizeof(buf)) - return; + struct ethhdr buf, *eh = &buf; + struct { + void *data; + size_t len; + } iov[2]; memcpy(eh->h_dest, client_ethaddr, ETH_ALEN); memcpy(eh->h_source, special_ethaddr, ETH_ALEN - 1); /* XXX: not correct */ eh->h_source[5] = CTL_ALIAS; eh->h_proto = htons(ETH_P_IP); - memcpy(buf + sizeof(struct ethhdr), ip_data, ip_data_len); - slirp_output(buf, ip_data_len + ETH_HLEN); + iov[0].data = &buf; + iov[0].len = sizeof(struct ethhdr); + iov[1].data = ip_data; + iov[1].len = ip_data_len; + slirp_output_iov(iov, 2); } int slirp_redir(int is_udp, int host_port,