Attached is a revised version. * I am still keeping the sendmsg instead of iov_send because I have not had the time to review udp.c in the kernel and do the relevant regression testing to see if they connect() still breaks bind() on multihomed hosts as it did in 2.6. We can revisit that at a later date, without doing the proper investigation I am not comfortable trying this.
* I have killed completely parse6 and replaced that by a common getaddrinfo() with a selectable address family based on the boolean flags. This also allows to force a specific v4 or v6 address choice for dst. * addresses now are specified separately from ports and ports are deliberately strings so you can specify them as a protocol. * mode bitmask is gone it is all booleans now - both for option invocation and internally in the code. * I have added the extra offset back in so it is feature by feature compatible with linux kernel implementation. * All mallocs are now to exact size and * Indentation, style, debug, etc are all as requested now. * l2tpv3.h has become surplus to requirements and is gone now. * it now has proper cleanup. * I have tested it for a few setups (mostly v4) and it works as advertised. I need to rewrite my tests scripts for the new options names to give it a full test. I would not expect that to show any problems though - core send/receive logic is unchanged from the original version. * I have not yet addressed the page size and page alignment of buffers items - leaving that open for an RFC on how to get the max performance there and how to make it expandable lately so one can re-configure it for large MTU/jumbo frames. Example magic incantation to invoke it with the new options: #!/bin/sh kvm -hda kvm.img -m 1024 \ -net nic,vlan=0,model=virtio,macaddr=0a:98:fc:96:83:01 \ -net l2tpv3,vlan=0,udp,src=192.168.63.1,srcport=1700,dst=192.168.63.1,dstport=1701,cookie64,txcookie=0x0123456789abcdef,rxcookie=0xfedcba9876543210,rxsession=0xffffffff,txsession=0xffffffff,counter A.
diff --git a/net/Makefile.objs b/net/Makefile.objs index 4854a14..160214e 100644 --- a/net/Makefile.objs +++ b/net/Makefile.objs @@ -2,6 +2,7 @@ common-obj-y = net.o queue.o checksum.o util.o hub.o common-obj-y += socket.o common-obj-y += dump.o common-obj-y += eth.o +common-obj-$(CONFIG_LINUX) += l2tpv3.o common-obj-$(CONFIG_POSIX) += tap.o common-obj-$(CONFIG_LINUX) += tap-linux.o common-obj-$(CONFIG_WIN32) += tap-win32.o diff --git a/net/clients.h b/net/clients.h index 7793294..bbf177c 100644 --- a/net/clients.h +++ b/net/clients.h @@ -47,6 +47,8 @@ int net_init_tap(const NetClientOptions *opts, const char *name, int net_init_bridge(const NetClientOptions *opts, const char *name, NetClientState *peer); +int net_init_l2tpv3(const NetClientOptions *opts, const char *name, + NetClientState *peer); #ifdef CONFIG_VDE int net_init_vde(const NetClientOptions *opts, const char *name, NetClientState *peer); diff --git a/net/l2tpv3.c b/net/l2tpv3.c new file mode 100644 index 0000000..302fc1d --- /dev/null +++ b/net/l2tpv3.c @@ -0,0 +1,541 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2012-2014 Cisco Systems + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <linux/ip.h> +#include <netdb.h> +#include "config-host.h" +#include "net/net.h" +#include "clients.h" +#include "monitor/monitor.h" +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" + + + + +#define PAGE_SIZE 4096 +#define IOVSIZE 2 +#define MAX_L2TPV3_MSGCNT 32 +#define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE) + +#ifndef IPPROTO_L2TP +#define IPPROTO_L2TP 0x73 +#endif + +typedef struct NetL2TPV3State { + NetClientState nc; + int fd; + int state; + unsigned int index; + unsigned int packet_len; + + /* + * these are used for xmit - that happens packet a time + * and for first sign of life packet (easier to parse that once) + */ + + uint8_t * header_buf; + struct iovec * vec; + + /* + * these are used for receive - try to "eat" up to 32 packets at a time + */ + + struct mmsghdr * msgvec; + + /* + * peer address + */ + + struct sockaddr_storage * dgram_dst; + uint32_t dst_size; + + /* + * L2TPv3 parameters + */ + + uint64_t rx_cookie; + uint64_t tx_cookie; + uint32_t rx_session; + uint32_t tx_session; + uint32_t header_size; + uint32_t counter; + + /* + * Bitmask mode determining encaps behaviour + */ + + uint32_t offset; + uint32_t cookie_offset; + uint32_t counter_offset; + uint32_t session_offset; + + /* Flags */ + + bool ipv6; + bool udp; + bool nocounter; + bool cookie; + bool cookie_is_64; + +} NetL2TPV3State; + +typedef struct NetL2TPV3ListenState { + NetClientState nc; + char *model; + char *name; + int fd; +} NetL2TPV3ListenState; + +static int l2tpv3_form_header(NetL2TPV3State *s) { + uint32_t *header; + uint32_t *session; + uint64_t *cookie64; + uint32_t *cookie32; + uint32_t *counter; + + if (s->udp == TRUE) { + header = (uint32_t *) s->header_buf; + stl_be_p(header, 0x30000); + } + session = (uint32_t *) (s->header_buf + s->session_offset); + stl_be_p(session, s->tx_session); + + if (s->cookie == TRUE ) { + if (s->cookie_is_64 == TRUE) { + cookie64 = (uint64_t *)(s->header_buf + s->cookie_offset); + stq_be_p(cookie64, s->tx_cookie); + } else { + cookie32 = (uint32_t *) (s->header_buf + s->cookie_offset); + stl_be_p(cookie32, s->tx_cookie); + } + } + + if (s->nocounter == FALSE) { + counter = (uint32_t *)(s->header_buf + s->counter_offset); + stl_be_p(counter, ++ s->counter); + } + return 0; +} + +static ssize_t net_l2tpv3_receive_dgram_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + + struct msghdr message; + int ret; + + if (iovcnt > MAX_L2TPV3_IOVCNT - 1) { + fprintf(stderr, "iovec too long %d > %d, change l2tpv3.h\n", iovcnt, MAX_L2TPV3_IOVCNT); + return -1; + } + l2tpv3_form_header(s); + memcpy(s->vec + 1, iov, iovcnt * sizeof(struct iovec)); + s->vec->iov_base = s->header_buf; + s->vec->iov_len = s->offset; + message.msg_name = s->dgram_dst; + message.msg_namelen = s->dst_size; + message.msg_iov = (struct iovec *) s->vec; + message.msg_iovlen = iovcnt + 1; + message.msg_control = NULL; + message.msg_controllen = 0; + message.msg_flags = 0; + ret = sendmsg(s->fd, &message, MSG_DONTWAIT) - s->offset; + if (ret < 0) { + ret = - errno; + } else if (ret == 0) { + ret = iov_size (iov, iovcnt); + } else { + ret =- s->offset; + } + return ret; +} + +static ssize_t net_l2tpv3_receive_dgram(NetClientState *nc, const uint8_t *buf, size_t size) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + + struct iovec * vec; + struct msghdr message; + ssize_t ret = 0; + + l2tpv3_form_header(s); + vec = s->vec; + vec->iov_base = s->header_buf; + vec->iov_len = s->offset; + vec++; + vec->iov_base = (void *) buf; + vec->iov_len = size; + message.msg_name = s->dgram_dst; + message.msg_namelen = s->dst_size; + message.msg_iov = (struct iovec *) s->vec; + message.msg_iovlen = 2; + message.msg_control = NULL; + message.msg_controllen = 0; + message.msg_flags = 0; + ret = sendmsg(s->fd, &message, 0); + if (ret < 0) { + ret = - errno; + } else if (ret == 0) { + ret = size; + } else { + ret =- s->offset; + } + return ret; +} + +static int l2tpv3_verify_header(NetL2TPV3State *s, uint8_t *buf) { + + uint64_t *cookie64; + uint32_t *cookie32; + uint32_t *session; + + if ((s->udp == FALSE) && (s->ipv6 == FALSE)){ + buf += sizeof(struct iphdr) /* fix for ipv4 raw */; + } + if (s->cookie == TRUE) { + if (s->cookie_is_64 == TRUE) { + /* 64 bit cookie */ + cookie64 = (uint64_t *)(buf + s->cookie_offset); + if ( ldq_be_p(cookie64) != s->rx_cookie) { + fprintf(stderr, "unknown cookie id\n"); + return -1; /* we need to return 0, otherwise barfus */ + } + } else { + cookie32 = (uint32_t *)(buf + s->cookie_offset); + if (ldl_be_p(cookie32) != * (uint32_t *) &s->rx_cookie) { + fprintf(stderr,"unknown cookie id\n"); + return -1 ; /* we need to return 0, otherwise barfus */ + } + } + } + session = (uint32_t *) (buf + s->session_offset); + if (ldl_be_p(session) != s->rx_session) { + fprintf(stderr,"session mismatch\n"); + return -1; + } + return 0; +} + +static void net_l2tpv3_send(void *opaque) +{ + NetL2TPV3State *s = opaque; + + int i, count, offset; + struct mmsghdr * msgvec; + struct iovec * vec; + + msgvec = s->msgvec; + offset = s->offset; + if ((s->udp == FALSE) && (s->ipv6 == FALSE)){ + offset += sizeof(struct iphdr); + } + count = recvmmsg(s->fd, msgvec, MAX_L2TPV3_MSGCNT, MSG_DONTWAIT, NULL); + for (i=0;i<count;i++) { + if (msgvec->msg_len > 0) { + vec = msgvec->msg_hdr.msg_iov; + vec->iov_len = offset; /* belt and braces - restore iov size */ + if ((msgvec->msg_len > 0) && (l2tpv3_verify_header(s, vec->iov_base) == 0)) { + vec++; + qemu_send_packet(&s->nc, vec->iov_base, msgvec->msg_len - offset); + } else { + fprintf(stderr, "l2tpv3 header verification failed\n"); + vec++; + } + vec->iov_len = PAGE_SIZE; /* belt and braces - restore iov size */ + } + msgvec++; + } +} + +static void destroy_vector(struct mmsghdr * msgvec, int count, int iovcount) { + int i, j; + struct iovec * iov; + struct mmsghdr * cleanup = msgvec; + if (cleanup) { + for (i=0;i<count;i++) { + if (cleanup->msg_hdr.msg_iov) { + iov = cleanup->msg_hdr.msg_iov; + for (j=0;j<iovcount;j++) { + if (iov->iov_base) { + g_free(iov->iov_base); + } + iov++; + } + g_free(cleanup->msg_hdr.msg_iov); + } + cleanup++; + } + g_free(msgvec); + } +} + +static struct mmsghdr * build_l2tpv3_vector(NetL2TPV3State *s, int count) { + int i; + struct iovec * iov; + struct mmsghdr * msgvec, *result; + + msgvec = g_malloc(sizeof(struct mmsghdr) * count); + result = msgvec; + for (i=0;i < count ;i++) { + msgvec->msg_hdr.msg_name = NULL; + msgvec->msg_hdr.msg_namelen = 0; + iov = g_malloc(sizeof(struct iovec) * IOVSIZE); + msgvec->msg_hdr.msg_iov = iov; + if ((s->udp == FALSE) && (s->ipv6 == FALSE)){ + iov->iov_base = g_malloc(s->offset + sizeof(struct iphdr)); /* fix for ipv4 raw */; + iov->iov_len = s->offset + sizeof (struct iphdr); + } else { + iov->iov_base = g_malloc(s->offset); + iov->iov_len = s->offset; + } + iov++ ; + iov->iov_base = qemu_memalign(PAGE_SIZE, PAGE_SIZE); + iov->iov_len = PAGE_SIZE; + msgvec->msg_hdr.msg_iovlen = 2; + msgvec->msg_hdr.msg_control = NULL; + msgvec->msg_hdr.msg_controllen = 0; + msgvec->msg_hdr.msg_flags = 0; + msgvec++; + } + return result; +} + +static void net_l2tpv3_cleanup(NetClientState *nc) +{ + NetL2TPV3State *s = DO_UPCAST(NetL2TPV3State, nc, nc); + qemu_set_fd_handler(s->fd, NULL, NULL, NULL); + close(s->fd); + destroy_vector(s->msgvec, MAX_L2TPV3_MSGCNT, IOVSIZE); + g_free(s->header_buf); + g_free(s->dgram_dst); +} + +static NetClientInfo net_l2tpv3_info = { + .type = NET_CLIENT_OPTIONS_KIND_L2TPV3, + .size = sizeof(NetL2TPV3State), + .receive = net_l2tpv3_receive_dgram, + .receive_iov = net_l2tpv3_receive_dgram_iov, + .cleanup = net_l2tpv3_cleanup, +}; + +int net_init_l2tpv3(const NetClientOptions *opts, + const char *name, + NetClientState *peer) { + + + const NetdevL2TPv3Options * l2tpv3; + NetL2TPV3State *s; + NetClientState *nc; + int fd; + struct addrinfo hints; + struct addrinfo * result = NULL; + char * srcport, * dstport; + + nc = qemu_new_net_client(&net_l2tpv3_info, peer, "l2tpv3", name); + + s = DO_UPCAST(NetL2TPV3State, nc, nc); + + assert(opts->kind == NET_CLIENT_OPTIONS_KIND_L2TPV3); + l2tpv3 = opts->l2tpv3; + + /* Form mode bitmask */ + + if ((l2tpv3->has_ipv6) && (l2tpv3->ipv6 == TRUE)) { + s->ipv6 = l2tpv3->ipv6; + } else { + s->ipv6 = FALSE; + } + + if ((l2tpv3->has_rxcookie) || (l2tpv3->has_txcookie)) { + if ((l2tpv3->has_rxcookie) && (l2tpv3->has_txcookie)) { + s->cookie = TRUE; + } else { + return -1; + } + } else { + s->cookie = FALSE; + } + + if ((l2tpv3->has_cookie64) || (l2tpv3->cookie64 == TRUE)) { + s->cookie_is_64 = TRUE; + } else { + s->cookie_is_64 = FALSE; + } + + if ((l2tpv3->has_udp) && (l2tpv3->udp == TRUE)) { + s->udp = TRUE; + if (!(l2tpv3->has_srcport && l2tpv3->has_dstport)) { + fprintf(stderr, "l2tpv3_open : need both src and dst port for udp\n"); + return -1; + } else { + srcport = l2tpv3->srcport; + dstport = l2tpv3->dstport; + } + } else { + s->udp = FALSE; + srcport = NULL; + dstport = NULL; + } + + if ((l2tpv3->has_counter) && (l2tpv3->counter == FALSE)) { + s->nocounter = TRUE; + } else { + s->nocounter = FALSE; + } + + s->offset = 4; + s->session_offset = 0; + s->cookie_offset = 4; + s->counter_offset = 4; + + s->tx_session = l2tpv3->txsession; + if (l2tpv3->has_rxsession) { + s->rx_session = l2tpv3->rxsession; + } else { + s->rx_session = s->tx_session; + } + + if (s->cookie == TRUE) { + s->rx_cookie = l2tpv3->rxcookie; + s->tx_cookie = l2tpv3->txcookie; + if (s->cookie_is_64 == TRUE) { + /* 64 bit cookie */ + s->offset += 8; + s->counter_offset += 8; + } else { + /* 32 bit cookie */ + s->offset += 4; + s->counter_offset +=4; + } + } + + memset(&hints, 0, sizeof(hints)); + + if (s->ipv6 == TRUE) { + hints.ai_family = AF_INET6; + } else { + hints.ai_family = AF_INET; + } + if (s->udp == TRUE) { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = 0; + s->offset += 4; + s->counter_offset += 4; + s->session_offset += 4; + s->cookie_offset += 4; + } else { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = IPPROTO_L2TP; + } + + if ((getaddrinfo(l2tpv3->src, srcport, &hints, &result) !=0) || (result == NULL)) { + fd = -errno; + fprintf(stderr, "l2tpv3_open : could not resolve src, " "errno = %d\n", fd); + return -1; + } + + + if ((fd = socket(result->ai_family, result->ai_socktype, result->ai_protocol)) == -1) { + fd = -errno; + fprintf(stderr, "l2tpv3_open : socket creation failed, " "errno = %d\n", -fd); + freeaddrinfo(result); + return fd; + } + if (bind(fd, (struct sockaddr *) result->ai_addr, result->ai_addrlen)) { + fprintf(stderr, "l2tpv3_open : could not bind socket err=%i\n", errno); + close(fd); + return -1; + } + + freeaddrinfo(result); + + memset(&hints, 0, sizeof(hints)); + + if (s->ipv6 == TRUE) { + hints.ai_family = AF_INET6; + } else { + hints.ai_family = AF_INET; + } + if (s->udp == TRUE) { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = 0; + } else { + hints.ai_socktype = SOCK_DGRAM; + hints.ai_protocol = IPPROTO_L2TP; + } + + if ((getaddrinfo(l2tpv3->dst, dstport, &hints, &result) !=0) || (result == NULL)) { + fprintf(stderr, "l2tpv3_open : could not resolve dst, " "errno = %d\n", -fd); + return -1; + } + + s->dgram_dst = g_malloc(sizeof(struct sockaddr_storage)); + memset(s->dgram_dst, '\0' , sizeof(struct sockaddr_storage)); + memcpy(s->dgram_dst, result->ai_addr, result->ai_addrlen); + s->dst_size = result->ai_addrlen; + + freeaddrinfo(result); + + if (s->nocounter == FALSE) { + s->offset += 4; + } + + if (l2tpv3->has_offset) { + /* extra offset */ + s->offset += l2tpv3->offset; + } + + s->msgvec = build_l2tpv3_vector(s, MAX_L2TPV3_MSGCNT); + s->vec = g_malloc(sizeof(struct iovec) * MAX_L2TPV3_IOVCNT); + if ((s->udp == FALSE) && (s->ipv6 == FALSE)){ + s->header_buf = g_malloc(s->offset + sizeof (struct iphdr)); + } else { + s->header_buf = g_malloc(s->offset); + } + + qemu_set_nonblock(fd); + + if (fd < 0) + return -1; + + s->fd = fd; + s->counter = 0; + + qemu_set_fd_handler(s->fd, net_l2tpv3_send, NULL, s); + + if (!s) { + fprintf (stderr, "l2tpv3_open : failed to set fd handler\n"); + return -1; + } + snprintf(s->nc.info_str, sizeof(s->nc.info_str), + "l2tpv3: connected"); + return 0; +} + diff --git a/net/net.c b/net/net.c index 0a88e68..d03f64d 100644 --- a/net/net.c +++ b/net/net.c @@ -731,6 +731,9 @@ static int (* const net_client_init_fun[NET_CLIENT_OPTIONS_KIND_MAX])( [NET_CLIENT_OPTIONS_KIND_BRIDGE] = net_init_bridge, #endif [NET_CLIENT_OPTIONS_KIND_HUBPORT] = net_init_hubport, +#ifdef CONFIG_LINUX + [NET_CLIENT_OPTIONS_KIND_L2TPV3] = net_init_l2tpv3, +#endif };