On Wed, Mar 26, 2014 at 11:08:41AM +0000, anton.iva...@kot-begemot.co.uk wrote: > +static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc, > + const struct iovec *iov, > + int iovcnt) > +{ > + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); > + > + struct msghdr message; > + int ret; > + > + if (iovcnt > MAX_L2TPV3_IOVCNT - 1) { > + error_report( > + "iovec too long %d > %d, change l2tpv3.h", > + iovcnt, MAX_L2TPV3_IOVCNT > + ); > + return -1; > + }
The alternative is to linearize the buffer using iov_to_buf() and call net_l2tpv3_receive_dgram(). Something like: if (iovcnt > MAX_L2TPV3_IOVCNT - 1) { size_t size = iov_size(iov, iovcnt); uint8_t *buf; buf = g_malloc(size); iov_to_buf(iov, iovcnt, 0, buf, size); ret = net_l2tpv3_receive_dgram(nc, buf, size); g_free(buf); return ret; } > + l2tpv3_form_header(s); > + memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec)); > + s->vec->iov_base = s->header_buf; > + s->vec->iov_len = s->offset; > + message.msg_name = s->dgram_dst; > + message.msg_namelen = s->dst_size; > + message.msg_iov = s->vec; > + message.msg_iovlen = iovcnt + 1; > + message.msg_control = NULL; > + message.msg_controllen = 0; > + message.msg_flags = 0; > + do { > + ret = sendmsg(s->fd, &message, 0); > + } while ((ret == -1) && (errno == EINTR)); > + if (ret > 0) { > + ret -= s->offset; > + } else if (ret == 0) { > + /* belt and braces - should not occur on DGRAM > + * we should get an error and never a 0 send > + */ Space missing before '*' > + ret = iov_size(iov, iovcnt); > + } else { > + /* signal upper layer that socket buffer is full */ > + ret = -errno; > + if (ret == -EAGAIN || ret == -ENOBUFS) { > + l2tpv3_write_poll(s, true); > + ret = 0; > + } > + } > + return ret; > +} > + > +static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, > + const uint8_t *buf, > + size_t size) > +{ > + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); > + > + struct iovec *vec; > + struct msghdr message; > + ssize_t ret = 0; > + > + l2tpv3_form_header(s); > + vec = s->vec; > + vec->iov_base = s->header_buf; > + vec->iov_len = s->offset; > + vec++; > + vec->iov_base = (void *) buf; > + vec->iov_len = size; > + message.msg_name = s->dgram_dst; > + message.msg_namelen = s->dst_size; > + message.msg_iov = s->vec; > + message.msg_iovlen = 2; > + message.msg_control = NULL; > + message.msg_controllen = 0; > + message.msg_flags = 0; > + do { > + ret = sendmsg(s->fd, &message, 0); > + } while ((ret == -1) && (errno == EINTR)); > + if (ret > 0) { > + ret -= s->offset; > + } else if (ret == 0) { > + /* belt and braces - should not occur on DGRAM > + * we should get an error and never a 0 send > + */ > + ret = size; > + } else { > + ret = -errno; > + if (ret == -EAGAIN || ret == -ENOBUFS) { > + /* signal upper layer that socket buffer is full */ > + l2tpv3_write_poll(s, true); > + ret = 0; > + } > + } > + return ret; > +} This function duplicates code, how about: static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size) { struct iovec iov = { .iov_base = buf, .iov_len = size, }; return net_l2tpv3_receive_dgram_iov(nc, &iov, 1); } > +static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) > +{ > + > + uint32_t *session; > + uint64_t cookie; > + > + if ((!s->udp) && (!s->ipv6)) { > + buf += sizeof(struct iphdr) /* fix for ipv4 raw */; > + } > + > + /* we do not do a strict check for "data" packets as per > + * the RFC spec because the pure IP spec does not have > + * that anyway. > + */ > + > + if (s->cookie) { > + if (s->cookie_is_64) { > + cookie = ldq_be_p(buf + s->cookie_offset); > + } else { > + cookie = ldl_be_p(buf + s->cookie_offset); > + } > + if (cookie != s->rx_cookie) { > + if (!s->header_mismatch) { > + error_report("unknown cookie id\n"); error_report() messages should not have a newline ('\n') at the end. > + } > + return -1; > + } > + } > + session = (uint32_t *) (buf + s->session_offset); > + if (ldl_be_p(session) != s->rx_session) { > + if (!s->header_mismatch) { > + error_report("session mismatch"); > + } > + return -1; > + } > + return 0; > +} > + > +static void net_l2tpv3_process_queue(NetL2TPV3State *s) > +{ > + int size = 0; > + struct iovec *vec; > + bool bad_read; > + int data_size; > + struct mmsghdr *msgvec; > + > + /* go into ring mode only if there is a "pending" tail */ > + if (s->queue_depth > 0 && (!s->delivering)) { > + do { > + msgvec = s->msgvec + s->queue_tail; > + if (msgvec->msg_len > 0) { > + data_size = msgvec->msg_len - s->header_size; header_size is never assigned to. > + vec = msgvec->msg_hdr.msg_iov; > + if ((data_size > 0) && > + (l2tpv3_verify_header(s, vec->iov_base) == 0)) { > + vec++; > + /* we do not use the "normal" send and send async > + * functions here because we have our own buffer - > + * the message vector. If we use the "usual" ones > + * we will end up double-buffering. > + */ > + s->delivering = true; What is s->delivering guarding against? This function is only called from the s->fd read handler function, so I don't see a reentrant code path. > + /* deliver directly to peer bypassing queueing and > + * buffering > + */ > + size = qemu_deliver_packet( > + &s->nc, > + QEMU_NET_PACKET_FLAG_NONE, > + vec->iov_base, > + data_size, > + s->nc.peer > + ); There is no callback when the peer re-enables receive. In other words, packets will be stuck in s->msgvec until a new packet arrives and net_l2tpv3_send() is invoked. I think you need to modify qemu_flush_queued_packets() to invoke a new callback. You could convert the existing code: if (nc->peer && nc->peer->info->type == NET_CLIENT_OPTIONS_KIND_HUBPORT) { if (net_hub_flush(nc->peer)) { qemu_notify_event(); } } to: if (nc->peer && nc->peer->info->peer_flushed) { if (nc->peer->info->peer_flushed(nc->peer)) { qemu_notify_event(); } } and move the hub flushing code into net/hub.c. Then you could also implement ->peer_flushed() in l2tpv3.c so that it calls net_l2tpv3_process_queue() and returns true if packets were delivered. Please introduce ->peer_flushed() in a separate patch. Or you could keep it simple and go back to using qemu_send_packet() without trying to avoid duplicate buffering. (You can always send patches later that eliminate the buffering.)