If VLAN acceleration is used when the kernel receives a packet then the outer-most VLAN tag will not be present in the packet when it is received by netdev-linux. Rather, it will be present in auxdata.
This patch uses recvmsg() instead of recv() to read auxdata for each packet and if the vlan_tid is set then it is added to the packet. Adding the vlan_tid to the packet involves copying most of the packet and may be rather expensive. There is ample scope to avoid this by passing the vlan_tid back to the caller separately to the packet itself or providing access headroom in the packet. This would most likely involve updating the netdev-class API. Signed-off-by: Simon Horman <ho...@verge.net.au> --- include/sparse/sys/socket.h | 1 + lib/netdev-linux.c | 86 +++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 81 insertions(+), 6 deletions(-) diff --git a/include/sparse/sys/socket.h b/include/sparse/sys/socket.h index 75ee43c..3212bf4 100644 --- a/include/sparse/sys/socket.h +++ b/include/sparse/sys/socket.h @@ -87,6 +87,7 @@ enum { }; enum { + SOL_PACKET, SOL_SOCKET }; diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 3e0da48..5034907 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -20,11 +20,11 @@ #include <errno.h> #include <fcntl.h> -#include <arpa/inet.h> #include <inttypes.h> #include <linux/filter.h> #include <linux/gen_stats.h> #include <linux/if_ether.h> +#include <linux/if_packet.h> #include <linux/if_tun.h> #include <linux/types.h> #include <linux/ethtool.h> @@ -37,10 +37,8 @@ #include <sys/types.h> #include <sys/ioctl.h> #include <sys/socket.h> -#include <netpacket/packet.h> #include <net/if.h> #include <net/if_arp.h> -#include <net/if_packet.h> #include <net/route.h> #include <netinet/in.h> #include <poll.h> @@ -767,7 +765,7 @@ netdev_linux_rx_construct(struct netdev_rx *rx_) rx->fd = netdev->tap_fd; } else { struct sockaddr_ll sll; - int ifindex; + int ifindex, val; /* Result of tcpdump -dd inbound */ static const struct sock_filter filt[] = { { 0x28, 0, 0, 0xfffff004 }, /* ldh [0] */ @@ -787,6 +785,16 @@ netdev_linux_rx_construct(struct netdev_rx *rx_) goto error; } + val = 1; + error = setsockopt(rx->fd, SOL_PACKET, PACKET_AUXDATA, + &val, sizeof val); + if (error) { + error = errno; + VLOG_ERR("%s: failed to mark socket for auxdata (%s)", + netdev_get_name(netdev_), ovs_strerror(error)); + goto error; + } + /* Set non-blocking mode. */ error = set_nonblocking(rx->fd); if (error) { @@ -803,7 +811,7 @@ netdev_linux_rx_construct(struct netdev_rx *rx_) memset(&sll, 0, sizeof sll); sll.sll_family = AF_PACKET; sll.sll_ifindex = ifindex; - sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL); + sll.sll_protocol = htons(ETH_P_ALL); if (bind(rx->fd, (struct sockaddr *) &sll, sizeof sll) < 0) { error = errno; VLOG_ERR("%s: failed to bind raw socket (%s)", @@ -852,6 +860,72 @@ netdev_linux_rx_dealloc(struct netdev_rx *rx_) } static int +netdev_linux_rx_recv_sock(int fd, void *data, size_t size) +{ + ssize_t retval; + struct cmsghdr *cmsg; + union { + struct cmsghdr cmsg; + char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; + } cmsg_buf; + struct msghdr msg; + struct iovec iov; + + iov.iov_base = data; + iov.iov_len = size; + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = &cmsg_buf; + msg.msg_controllen = sizeof cmsg_buf; + msg.msg_name = NULL; + + do { + retval = recvmsg(fd, &msg, MSG_TRUNC); + } while (retval < 0 && errno == EINTR); + + if (retval > size || retval < 0) { + return retval; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + struct tpacket_auxdata *aux; + + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) + || cmsg->cmsg_level != SOL_PACKET + || cmsg->cmsg_type != PACKET_AUXDATA) { + continue; + } + + aux = (struct tpacket_auxdata *)(void *)CMSG_DATA(cmsg); + if (aux->tp_vlan_tci) { + struct ofpbuf b; + + VLOG_WARN("vlan tci = %u", aux->tp_vlan_tci); + + if (retval + VLAN_HEADER_LEN > size) { + errno = EMSGSIZE; + return -errno; + } + if (size < ETH_ADDR_LEN) { + errno = EINVAL; + return -errno; + } + + /* XXX: This may be quite expensive! */ + memmove((char *)data + VLAN_HEADER_LEN, data, retval); + + ofpbuf_use_stack(&b, data, size); + b.data = (char *)b.data + VLAN_HEADER_LEN; + b.size = retval; + eth_push_vlan(&b, htons(aux->tp_vlan_tci)); + retval = b.size; + } + } + + return retval; +} + +static int netdev_linux_rx_recv(struct netdev_rx *rx_, void *data, size_t size) { struct netdev_rx_linux *rx = netdev_rx_linux_cast(rx_); @@ -860,7 +934,7 @@ netdev_linux_rx_recv(struct netdev_rx *rx_, void *data, size_t size) do { retval = (rx->is_tap ? read(rx->fd, data, size) - : recv(rx->fd, data, size, MSG_TRUNC)); + : netdev_linux_rx_recv_sock(rx->fd, data, size)); } while (retval < 0 && errno == EINTR); if (retval >= 0) { -- 1.8.4 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev