Looks like a significant code simplification. Thanks, Ethan
On Fri, Aug 5, 2011 at 14:42, Ben Pfaff <[email protected]> wrote: > The Open vSwitch tree only has one user of the ability for a netdev to > receive packets from a network device. Thus, this commit simplifies the > common-case use of the netdev interface by replacing the "ethertype" option > from "struct netdev_options" by a new netdev_listen() call. > > The only user of netdev_listen() wants to receive all packets from a > network device, so this commit also removes the ability to restrict the > received packets to a particular protocol. (This ability was once used by > the Open vSwitch integrated DHCP client, but that code has been removed.) > > This commit also simplifies and improves the implementation of the code > in netdev-linux that started listening to a network device. Before, I had > not figured out how to avoid receiving all packets on all devices before > binding to a particular device, but I took a closer look at the kernel code > and figured it out. > > I've tested that the userspace datapath (dpif-netdev), the only user of > netdev_recv(), still works after this change. > --- > lib/dpif-netdev.c | 9 ++++- > lib/netdev-dummy.c | 24 +++++++++-- > lib/netdev-linux.c | 109 > ++++++++++++++++++++++++++----------------------- > lib/netdev-provider.h | 44 +++++++++++++------ > lib/netdev-vport.c | 4 +- > lib/netdev.c | 28 +++++++++---- > lib/netdev.h | 8 +--- > ofproto/ofproto.c | 1 - > utilities/ovs-dpctl.c | 2 - > vswitchd/bridge.c | 2 - > 10 files changed, 138 insertions(+), 93 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 14b9192..fa6b549 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -350,7 +350,6 @@ do_add_port(struct dp_netdev *dp, const char *devname, > const char *type, > /* Open and validate network device. */ > memset(&netdev_options, 0, sizeof netdev_options); > netdev_options.name = devname; > - netdev_options.ethertype = NETDEV_ETH_TYPE_ANY; > if (dp->class == &dpif_dummy_class) { > netdev_options.type = "dummy"; > } else if (internal) { > @@ -364,6 +363,14 @@ do_add_port(struct dp_netdev *dp, const char *devname, > const char *type, > /* XXX reject loopback devices */ > /* XXX reject non-Ethernet devices */ > > + error = netdev_listen(netdev); > + if (error) { > + VLOG_ERR("%s: cannot receive packets on this network device (%s)", > + devname, strerror(errno)); > + netdev_close(netdev); > + return error; > + } > + > error = netdev_turn_flags_on(netdev, NETDEV_PROMISC, false); > if (error) { > netdev_close(netdev); > diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c > index 9cd06f1..15d97cf 100644 > --- a/lib/netdev-dummy.c > +++ b/lib/netdev-dummy.c > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2010 Nicira Networks. > + * Copyright (c) 2010, 2011 Nicira Networks. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -102,8 +102,7 @@ netdev_dummy_destroy(struct netdev_dev *netdev_dev_) > } > > static int > -netdev_dummy_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED, > - struct netdev **netdevp) > +netdev_dummy_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp) > { > struct netdev_dummy *netdev; > > @@ -122,6 +121,22 @@ netdev_dummy_close(struct netdev *netdev_) > } > > static int > +netdev_dummy_listen(struct netdev *netdev_ OVS_UNUSED) > +{ > + /* It's OK to listen on a dummy device. It just never receives any > + * packets. */ > + return 0; > +} > + > +static int > +netdev_dummy_recv(struct netdev *netdev_ OVS_UNUSED, > + void *buffer OVS_UNUSED, size_t size OVS_UNUSED) > +{ > + /* A dummy device never receives any packets. */ > + return -EAGAIN; > +} > + > +static int > netdev_dummy_set_etheraddr(struct netdev *netdev, > const uint8_t mac[ETH_ADDR_LEN]) > { > @@ -234,7 +249,8 @@ static const struct netdev_class dummy_class = { > > NULL, /* enumerate */ > > - NULL, /* recv */ > + netdev_dummy_listen, /* listen */ > + netdev_dummy_recv, /* recv */ > NULL, /* recv_wait */ > NULL, /* drain */ > > diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c > index 385c0b8..ac511f0 100644 > --- a/lib/netdev-linux.c > +++ b/lib/netdev-linux.c > @@ -639,8 +639,7 @@ netdev_linux_destroy(struct netdev_dev *netdev_dev_) > } > > static int > -netdev_linux_open(struct netdev_dev *netdev_dev_, int ethertype, > - struct netdev **netdevp) > +netdev_linux_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp) > { > struct netdev_dev_linux *netdev_dev = netdev_dev_linux_cast(netdev_dev_); > struct netdev_linux *netdev; > @@ -676,54 +675,6 @@ netdev_linux_open(struct netdev_dev *netdev_dev_, int > ethertype, > * directions appearing to be reversed. */ > netdev->fd = netdev_dev->state.tap.fd; > netdev_dev->state.tap.opened = true; > - } else if (ethertype != NETDEV_ETH_TYPE_NONE) { > - struct sockaddr_ll sll; > - int protocol; > - int ifindex; > - > - /* Create file descriptor. */ > - protocol = (ethertype == NETDEV_ETH_TYPE_ANY ? ETH_P_ALL > - : ethertype == NETDEV_ETH_TYPE_802_2 ? ETH_P_802_2 > - : ethertype); > - netdev->fd = socket(PF_PACKET, SOCK_RAW, > - (OVS_FORCE int) htons(protocol)); > - if (netdev->fd < 0) { > - error = errno; > - goto error; > - } > - > - /* Set non-blocking mode. */ > - error = set_nonblocking(netdev->fd); > - if (error) { > - goto error; > - } > - > - /* Get ethernet device index. */ > - error = get_ifindex(&netdev->netdev, &ifindex); > - if (error) { > - goto error; > - } > - > - /* Bind to specific ethernet device. */ > - memset(&sll, 0, sizeof sll); > - sll.sll_family = AF_PACKET; > - sll.sll_ifindex = ifindex; > - if (bind(netdev->fd, > - (struct sockaddr *) &sll, sizeof sll) < 0) { > - error = errno; > - VLOG_ERR("bind to %s failed: %s", > netdev_dev_get_name(netdev_dev_), > - strerror(error)); > - goto error; > - } > - > - /* Between the socket() and bind() calls above, the socket receives > all > - * packets of the requested type on all system interfaces. We do not > - * want to receive that data, but there is no way to avoid it. So we > - * must now drain out the receive queue. */ > - error = drain_rcvbuf(netdev->fd); > - if (error) { > - goto error; > - } > } > > *netdevp = &netdev->netdev; > @@ -769,12 +720,67 @@ netdev_linux_enumerate(struct sset *sset) > } > > static int > +netdev_linux_listen(struct netdev *netdev_) > +{ > + struct netdev_linux *netdev = netdev_linux_cast(netdev_); > + struct sockaddr_ll sll; > + int ifindex; > + int error; > + int fd; > + > + if (netdev->fd >= 0) { > + return 0; > + } > + > + /* Create file descriptor. */ > + fd = socket(PF_PACKET, SOCK_RAW, 0); > + if (fd < 0) { > + error = errno; > + VLOG_ERR("failed to create raw socket (%s)", strerror(error)); > + goto error; > + } > + > + /* Set non-blocking mode. */ > + error = set_nonblocking(fd); > + if (error) { > + goto error; > + } > + > + /* Get ethernet device index. */ > + error = get_ifindex(&netdev->netdev, &ifindex); > + if (error) { > + goto error; > + } > + > + /* Bind to specific ethernet device. */ > + memset(&sll, 0, sizeof sll); > + sll.sll_family = AF_PACKET; > + sll.sll_ifindex = ifindex; > + sll.sll_protocol = (OVS_FORCE unsigned short int) htons(ETH_P_ALL); > + if (bind(fd, (struct sockaddr *) &sll, sizeof sll) < 0) { > + error = errno; > + VLOG_ERR("%s: failed to bind raw socket (%s)", > + netdev_get_name(netdev_), strerror(error)); > + goto error; > + } > + > + netdev->fd = fd; > + return 0; > + > +error: > + if (fd >= 0) { > + close(fd); > + } > + return error; > +} > + > +static int > netdev_linux_recv(struct netdev *netdev_, void *data, size_t size) > { > struct netdev_linux *netdev = netdev_linux_cast(netdev_); > > if (netdev->fd < 0) { > - /* Device was opened with NETDEV_ETH_TYPE_NONE. */ > + /* Device is not listening. */ > return -EAGAIN; > } > > @@ -2254,6 +2260,7 @@ netdev_linux_change_seq(const struct netdev *netdev) > \ > ENUMERATE, \ > \ > + netdev_linux_listen, \ > netdev_linux_recv, \ > netdev_linux_recv_wait, \ > netdev_linux_drain, \ > diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h > index 7bb4eac..093a25d 100644 > --- a/lib/netdev-provider.h > +++ b/lib/netdev-provider.h > @@ -147,14 +147,8 @@ struct netdev_class { > const struct shash *args); > > /* Attempts to open a network device. On success, sets 'netdevp' > - * to the new network device. > - * > - * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order > - * to capture frames of that type received on the device. It may also be > - * one of the 'enum netdev_pseudo_ethertype' values to receive frames in > - * one of those categories. */ > - int (*open)(struct netdev_dev *netdev_dev, int ethertype, > - struct netdev **netdevp); > + * to the new network device. */ > + int (*open)(struct netdev_dev *netdev_dev, struct netdev **netdevp); > > /* Closes 'netdev'. */ > void (*close)(struct netdev *netdev); > @@ -168,17 +162,39 @@ struct netdev_class { > * If this netdev class does not support enumeration, this may be a null > * pointer. */ > int (*enumerate)(struct sset *all_names); > + > +/* ## ----------------- ## */ > +/* ## Receiving Packets ## */ > +/* ## ----------------- ## */ > + > +/* The network provider interface is mostly used for inspecting and > configuring > + * device "metadata", not for sending and receiving packets directly. It may > + * be impractical to implement these functions on some operating systems and > + * hardware. These functions may all be NULL in such cases. > + * > + * (However, the "dpif-netdev" implementation, which is the easiest way to > + * integrate Open vSwitch with a new operating system or hardware, does > require > + * the ability to receive packets.) */ > + > + /* Attempts to set up 'netdev' for receiving packets with ->recv(). > + * Returns 0 if successful, otherwise a positive errno value. Return > + * EOPNOTSUPP to indicate that the network device does not implement > packet > + * reception through this interface. This function may be set to null if > + * it would always return EOPNOTSUPP anyhow. (This will prevent the > + * network device from being usefully used by the netdev-based "userspace > + * datapath".)*/ > + int (*listen)(struct netdev *netdev); > > /* Attempts to receive a packet from 'netdev' into the 'size' bytes in > * 'buffer'. If successful, returns the number of bytes in the received > * packet, otherwise a negative errno value. Returns -EAGAIN immediately > * if no packet is ready to be received. > * > - * May return -EOPNOTSUPP if a network device does not implement packet > - * reception through this interface. This function may be set to null if > - * it would always return -EOPNOTSUPP anyhow. (This will prevent the > - * network device from being usefully used by the netdev-based "userspace > - * datapath".) */ > + * This function can only be expected to return a packet if ->listen() > has > + * been called successfully. > + * > + * May be null if not needed, such as for a network device that does not > + * implement packet reception through the 'recv' member function. */ > int (*recv)(struct netdev *netdev, void *buffer, size_t size); > > /* Registers with the poll loop to wake up from the next call to > @@ -194,7 +210,7 @@ struct netdev_class { > * May be null if not needed, such as for a network device that does not > * implement packet reception through the 'recv' member function. */ > int (*drain)(struct netdev *netdev); > - > + > /* Sends the 'size'-byte packet in 'buffer' on 'netdev'. Returns 0 if > * successful, otherwise a positive errno value. Returns EAGAIN without > * blocking if the packet cannot be queued immediately. Returns EMSGSIZE > diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c > index b9c1bfe..e3e480d 100644 > --- a/lib/netdev-vport.c > +++ b/lib/netdev-vport.c > @@ -258,8 +258,7 @@ netdev_vport_destroy(struct netdev_dev *netdev_dev_) > } > > static int > -netdev_vport_open(struct netdev_dev *netdev_dev_, int ethertype OVS_UNUSED, > - struct netdev **netdevp) > +netdev_vport_open(struct netdev_dev *netdev_dev_, struct netdev **netdevp) > { > struct netdev_vport *netdev; > > @@ -937,6 +936,7 @@ config_equal_ipsec(const struct shash *nd_args, const > struct shash *args) > \ > NULL, /* enumerate */ \ > \ > + NULL, /* listen */ \ > NULL, /* recv */ \ > NULL, /* recv_wait */ \ > NULL, /* drain */ \ > diff --git a/lib/netdev.c b/lib/netdev.c > index b8592c1..9954929 100644 > --- a/lib/netdev.c > +++ b/lib/netdev.c > @@ -204,12 +204,8 @@ update_device_args(struct netdev_dev *dev, const struct > shash *args) > * to the new network device, otherwise to null. > * > * If this is the first time the device has been opened, then create is called > - * before opening. The device is created using the given type and arguments. > - * > - * 'ethertype' may be a 16-bit Ethernet protocol value in host byte order to > - * capture frames of that type received on the device. It may also be one of > - * the 'enum netdev_pseudo_ethertype' values to receive frames in one of > those > - * categories. */ > + * before opening. The device is created using the given type and > + * arguments. */ > int > netdev_open(struct netdev_options *options, struct netdev **netdevp) > { > @@ -250,8 +246,7 @@ netdev_open(struct netdev_options *options, struct netdev > **netdevp) > return EINVAL; > } > > - error = netdev_dev->netdev_class->open(netdev_dev, options->ethertype, > - netdevp); > + error = netdev_dev->netdev_class->open(netdev_dev, netdevp); > > if (!error) { > netdev_dev->ref_cnt++; > @@ -271,7 +266,6 @@ netdev_open_default(const char *name, struct netdev > **netdevp) > > memset(&options, 0, sizeof options); > options.name = name; > - options.ethertype = NETDEV_ETH_TYPE_NONE; > > return netdev_open(&options, netdevp); > } > @@ -387,6 +381,19 @@ netdev_enumerate(struct sset *sset) > return error; > } > > +/* Attempts to set up 'netdev' for receiving packets with netdev_recv(). > + * Returns 0 if successful, otherwise a positive errno value. EOPNOTSUPP > + * indicates that the network device does not implement packet reception > + * through this interface. */ > +int > +netdev_listen(struct netdev *netdev) > +{ > + int (*listen)(struct netdev *); > + > + listen = netdev_get_dev(netdev)->netdev_class->listen; > + return listen ? (listen)(netdev) : EOPNOTSUPP; > +} > + > /* Attempts to receive a packet from 'netdev' into 'buffer', which the caller > * must have initialized with sufficient room for the packet. The space > * required to receive any packet is ETH_HEADER_LEN bytes, plus > VLAN_HEADER_LEN > @@ -394,6 +401,9 @@ netdev_enumerate(struct sset *sset) > * (Some devices do not allow for a VLAN header, in which case VLAN_HEADER_LEN > * need not be included.) > * > + * This function can only be expected to return a packet if ->listen() has > + * been called successfully. > + * > * If a packet is successfully retrieved, returns 0. In this case 'buffer' is > * guaranteed to contain at least ETH_TOTAL_MIN bytes. Otherwise, returns a > * positive errno value. Returns EAGAIN immediately if no packet is ready to > diff --git a/lib/netdev.h b/lib/netdev.h > index ba4a8e3..7e16bd3 100644 > --- a/lib/netdev.h > +++ b/lib/netdev.h > @@ -44,12 +44,6 @@ enum netdev_flags { > NETDEV_LOOPBACK = 0x0004 /* This is a loopback device. */ > }; > > -enum netdev_pseudo_ethertype { > - NETDEV_ETH_TYPE_NONE = -128, /* Receive no frames. */ > - NETDEV_ETH_TYPE_ANY, /* Receive all frames. */ > - NETDEV_ETH_TYPE_802_2 /* Receive all IEEE 802.2 frames. */ > -}; > - > /* Network device statistics. > * > * Values of unsupported statistics are set to all-1-bits (UINT64_MAX). */ > @@ -85,7 +79,6 @@ struct netdev_options { > const char *name; > const char *type; > const struct shash *args; > - int ethertype; > }; > > struct netdev; > @@ -117,6 +110,7 @@ int netdev_get_mtu(const struct netdev *, int *mtup); > int netdev_get_ifindex(const struct netdev *); > > /* Packet send and receive. */ > +int netdev_listen(struct netdev *); > int netdev_recv(struct netdev *, struct ofpbuf *); > void netdev_recv_wait(struct netdev *); > int netdev_drain(struct netdev *); > diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c > index f40f995..8054d05 100644 > --- a/ofproto/ofproto.c > +++ b/ofproto/ofproto.c > @@ -1115,7 +1115,6 @@ ofport_open(const struct ofproto_port *ofproto_port, > struct ofp_phy_port *opp) > memset(&netdev_options, 0, sizeof netdev_options); > netdev_options.name = ofproto_port->name; > netdev_options.type = ofproto_port->type; > - netdev_options.ethertype = NETDEV_ETH_TYPE_NONE; > > error = netdev_open(&netdev_options, &netdev); > if (error) { > diff --git a/utilities/ovs-dpctl.c b/utilities/ovs-dpctl.c > index 7962c7a..1c31c71 100644 > --- a/utilities/ovs-dpctl.c > +++ b/utilities/ovs-dpctl.c > @@ -232,7 +232,6 @@ do_add_if(int argc OVS_UNUSED, char *argv[]) > options.name = strtok_r(argv[i], ",", &save_ptr); > options.type = "system"; > options.args = &args; > - options.ethertype = NETDEV_ETH_TYPE_NONE; > > if (!options.name) { > ovs_error(0, "%s is not a valid network device name", argv[i]); > @@ -384,7 +383,6 @@ show_dpif(struct dpif *dpif) > netdev_options.name = dpif_port.name; > netdev_options.type = dpif_port.type; > netdev_options.args = NULL; > - netdev_options.ethertype = NETDEV_ETH_TYPE_NONE; > error = netdev_open(&netdev_options, &netdev); > if (!error) { > const struct shash_node **nodes; > diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c > index 9b30791..f43902b 100644 > --- a/vswitchd/bridge.c > +++ b/vswitchd/bridge.c > @@ -859,7 +859,6 @@ bridge_add_ofproto_ports(struct bridge *br) > options.name = iface->name; > options.type = iface->type; > options.args = &args; > - options.ethertype = NETDEV_ETH_TYPE_NONE; > error = netdev_open(&options, &iface->netdev); > } else { > error = netdev_set_config(iface->netdev, &args); > @@ -925,7 +924,6 @@ bridge_add_ofproto_ports(struct bridge *br) > options.name = port->name; > options.type = "internal"; > options.args = NULL; > - options.ethertype = NETDEV_ETH_TYPE_NONE; > error = netdev_open(&options, &netdev); > if (!error) { > ofproto_port_add(br->ofproto, netdev, NULL); > -- > 1.7.4.4 > > _______________________________________________ > dev mailing list > [email protected] > http://openvswitch.org/mailman/listinfo/dev > _______________________________________________ dev mailing list [email protected] http://openvswitch.org/mailman/listinfo/dev
