[ovs-dev] [PATCH net-next 2/8] vxlan: Fix sparse warnings.
Fix following sparse warnings. drivers/net/vxlan.c:238:44: warning: incorrect type in argument 3 (different base types) drivers/net/vxlan.c:238:44:expected restricted __be32 [usertype] value drivers/net/vxlan.c:238:44:got unsigned int const [unsigned] [usertype] remote_vni drivers/net/vxlan.c:1735:18: warning: incorrect type in initializer (different signedness) drivers/net/vxlan.c:1735:18:expected int *id drivers/net/vxlan.c:1735:18:got unsigned int static [toplevel] * Signed-off-by: Pravin B Shelar --- drivers/net/vxlan.c |4 ++-- 1 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 173705f..91d04f9 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -78,7 +78,7 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -static unsigned int vxlan_net_id; +static int vxlan_net_id; /* per UDP socket information */ struct vxlan_sock { @@ -235,7 +235,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, nla_put_be16(skb, NDA_PORT, rdst->remote_port)) goto nla_put_failure; if (rdst->remote_vni != vxlan->default_dst.remote_vni && - nla_put_be32(skb, NDA_VNI, rdst->remote_vni)) + nla_put_u32(skb, NDA_VNI, rdst->remote_vni)) goto nla_put_failure; if (rdst->remote_ifindex && nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex)) -- 1.7.1 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH net-next 4/8] vxlan: Extend vxlan handlers for openvswitch.
Following patch adds data and priority fields to vxlan handlers and export vxlan handler api. vh->data is required to store private data per vxlan handler. vh->priority allows ovs to assign lower priority for ovs vxlan handler. So that vxlan device modules gets to look at vxlan packet before ovs. Signed-off-by: Pravin B Shelar --- drivers/net/vxlan.c | 50 ++ include/net/vxlan.h | 26 ++ 2 files changed, 52 insertions(+), 24 deletions(-) create mode 100644 include/net/vxlan.h diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 0830b71..6ca21c0 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -41,6 +41,7 @@ #include #include #include +#include #define VXLAN_VERSION "0.1" @@ -91,17 +92,6 @@ struct vxlan_sock { struct list_head handler_list; }; -struct vxlan_handler; -typedef int (vxlan_rcv_t)(struct vxlan_handler *vh, struct sk_buff *skb, __be32 key); - -struct vxlan_handler { - vxlan_rcv_t *rcv; - struct list_head node; - struct vxlan_sock *vs; - unsigned int refcnt; - struct rcu_head rcu; -}; - /* per-network namespace private data for this module */ struct vxlan_net { struct list_head vxlan_list; @@ -1533,10 +1523,11 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port) } -static struct vxlan_handler *vxlan_add_handler(struct net *net, - __be16 portno, vxlan_rcv_t *rcv) +struct vxlan_handler *vxlan_add_handler(struct net *net, + __be16 portno, vxlan_rcv_t *rcv, + void *data, int priority) { - struct vxlan_handler *vh; + struct vxlan_handler *vh, *new; struct vxlan_sock *vs; ASSERT_RTNL(); @@ -1554,25 +1545,35 @@ static struct vxlan_handler *vxlan_add_handler(struct net *net, } list_for_each_entry_rcu(vh, &vs->handler_list, node) { - if (vh->rcv == rcv) { + if (vh->rcv == rcv) { /* ignore prioroty here. */ vh->refcnt++; return vh; } } - vh = kzalloc(sizeof(*vh), GFP_KERNEL); - if (!vh) + new = kzalloc(sizeof(*new), GFP_KERNEL); + if (!new) return ERR_PTR(-ENOMEM); - vh->rcv = rcv; - vh->vs = vs; - vh->refcnt = 1; + new->rcv = rcv; + new->vs = vs; + new->refcnt = 1; + new->data = data; + new->priority = priority; + + list_for_each_entry_rcu(vh, &vs->handler_list, node) { + if (vh->priority > priority) { + list_add_tail_rcu(&new->node, &vh->node); + return vh; + } + } - list_add_rcu(&vh->node, &vs->handler_list); - return vh; + list_add_tail_rcu(&new->node, &vs->handler_list); + return new; } +EXPORT_SYMBOL_GPL(vxlan_add_handler); -static void vxlan_del_handler(struct vxlan_handler *vh) +void vxlan_del_handler(struct vxlan_handler *vh) { struct vxlan_sock *vs = vh->vs; @@ -1591,6 +1592,7 @@ static void vxlan_del_handler(struct vxlan_handler *vh) } } } +EXPORT_SYMBOL_GPL(vxlan_del_handler); static int vxlan_newlink(struct net *net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) @@ -1670,7 +1672,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, if (data[IFLA_VXLAN_PORT]) vxlan->dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]); - vxlan->vh = vxlan_add_handler(net, vxlan->dst_port, vxlan_rcv); + vxlan->vh = vxlan_add_handler(net, vxlan->dst_port, vxlan_rcv, NULL, 0); if (IS_ERR(vxlan->vh)) return PTR_ERR(vxlan->vh); diff --git a/include/net/vxlan.h b/include/net/vxlan.h new file mode 100644 index 000..015588b --- /dev/null +++ b/include/net/vxlan.h @@ -0,0 +1,26 @@ +#ifndef __NET_VXLAN_H +#define __NET_VXLAN_H 1 + +#include +#include +#include + +struct vxlan_handler; +typedef int (vxlan_rcv_t)(struct vxlan_handler *vh, struct sk_buff *skb, __be32 key); + +struct vxlan_handler { + vxlan_rcv_t *rcv; + struct list_head node; + void *data; + struct vxlan_sock *vs; + unsigned int refcnt; + struct rcu_head rcu; + int priority; +}; + +struct vxlan_handler *vxlan_add_handler(struct net *net, + __be16 portno, vxlan_rcv_t *rcv, + void *data, int priority); + +void vxlan_del_handler(struct vxlan_handler *vh); +#endif -- 1.7.1 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH net-next 3/8] vxlan: Allow multiple receive handlers.
Following patch adds basic multiple vxlan protocol handlers. This does not change any functionality. This is required for openvswitch vxlan support. Signed-off-by: Pravin B Shelar --- drivers/net/vxlan.c | 212 +-- 1 files changed, 137 insertions(+), 75 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 91d04f9..0830b71 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -57,6 +57,7 @@ #define VXLAN_VID_MASK (VXLAN_N_VID - 1) /* IP header + UDP + VXLAN + Ethernet header */ #define VXLAN_HEADROOM (20 + 8 + 8 + 14) +#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr)) #define VXLAN_FLAGS 0x0800 /* struct vxlanhdr.vx_flags required value. */ @@ -85,9 +86,20 @@ struct vxlan_sock { struct hlist_node hlist; struct rcu_head rcu; struct work_struct del_work; - unsigned int refcnt; struct socket *sock; struct hlist_head vni_list[VNI_HASH_SIZE]; + struct list_head handler_list; +}; + +struct vxlan_handler; +typedef int (vxlan_rcv_t)(struct vxlan_handler *vh, struct sk_buff *skb, __be32 key); + +struct vxlan_handler { + vxlan_rcv_t *rcv; + struct list_head node; + struct vxlan_sock *vs; + unsigned int refcnt; + struct rcu_head rcu; }; /* per-network namespace private data for this module */ @@ -120,7 +132,7 @@ struct vxlan_fdb { struct vxlan_dev { struct hlist_node hlist;/* vni hash table */ struct list_head next; /* vxlan's per namespace list */ - struct vxlan_sock *vn_sock; /* listening socket */ + struct vxlan_handler *vh; struct net_device *dev; struct vxlan_rdst default_dst; /* default destination */ __be32saddr;/* source address */ @@ -193,6 +205,17 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id, __be16 port) return NULL; } +static struct vxlan_dev *vxlan_find_vni_port(struct vxlan_sock *vs, u32 id) +{ + struct vxlan_dev *vxlan; + + hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) { + if (vxlan->default_dst.remote_vni == id) + return vxlan; + } + + return NULL; +} /* Fill in neighbour message in skbuff. */ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, const struct vxlan_fdb *fdb, @@ -671,7 +694,7 @@ static int vxlan_join_group(struct net_device *dev) { struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); - struct sock *sk = vxlan->vn_sock->sock->sk; + struct sock *sk = vxlan->vh->vs->sock->sk; struct ip_mreqn mreq = { .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, .imr_ifindex= vxlan->default_dst.remote_ifindex, @@ -699,7 +722,7 @@ static int vxlan_leave_group(struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id); int err = 0; - struct sock *sk = vxlan->vn_sock->sock->sk; + struct sock *sk = vxlan->vh->vs->sock->sk; struct ip_mreqn mreq = { .imr_multiaddr.s_addr = vxlan->default_dst.remote_ip, .imr_ifindex= vxlan->default_dst.remote_ifindex, @@ -722,23 +745,17 @@ static int vxlan_leave_group(struct net_device *dev) /* Callback from net/ipv4/udp.c to receive packets */ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) { - struct iphdr *oip; + struct vxlan_handler *vh; + struct vxlan_sock *vs; struct vxlanhdr *vxh; - struct vxlan_dev *vxlan; - struct pcpu_tstats *stats; __be16 port; - __u32 vni; - int err; - - /* pop off outer UDP header */ - __skb_pull(skb, sizeof(struct udphdr)); /* Need Vxlan and inner Ethernet header to be present */ - if (!pskb_may_pull(skb, sizeof(struct vxlanhdr))) + if (!pskb_may_pull(skb, VXLAN_HLEN)) goto error; - /* Drop packets with reserved bits set */ - vxh = (struct vxlanhdr *) skb->data; + /* Return packets with reserved bits set */ + vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1); if (vxh->vx_flags != htonl(VXLAN_FLAGS) || (vxh->vx_vni & htonl(0xff))) { netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n", @@ -746,28 +763,45 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto error; } - __skb_pull(skb, sizeof(struct vxlanhdr)); + if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB))) + goto drop; - /* Is this VNI defined? */ - vni = ntohl(vxh->vx_vni) >> 8; port = inet_sk(sk)->inet_sport; - vxlan = vxlan_find_vni(sock
[ovs-dev] [PATCH net-next 0/8] openvswitch: Add vxlan tunneling support.
First two patches fixes vxlan issues. Next two patches extends vxlan so that openvswitch can share vxlan udp port with vxlan module. Rest of patches refactors vxlan data plane so that ovs can share that code with vxlan module. Last patch adds vxlan-vport to openvswitch. Pravin B Shelar (8): vxlan: Fix error handling while creating device. vxlan: Fix sparse wornings. vxlan: Allow multiple recieve handlers. vxlan: Extend vxlan handlers for openvswitch. vxlan: Factor out vxlan send api. vxlan: Improve vxlan headroom calculation. vxlan: Add tx-vlan offload support. openvswitch: Add vxlan tunneling support. drivers/net/vxlan.c | 337 +- include/net/vxlan.h | 33 include/uapi/linux/openvswitch.h | 11 ++ net/openvswitch/Kconfig |1 + net/openvswitch/Makefile |3 +- net/openvswitch/vport-vxlan.c| 220 + net/openvswitch/vport.c |3 + net/openvswitch/vport.h |1 + 8 files changed, 490 insertions(+), 119 deletions(-) create mode 100644 include/net/vxlan.h create mode 100644 net/openvswitch/vport-vxlan.c ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH net-next 6/8] vxlan: Improve vxlan headroom calculation.
Rather than having static headroom calculation, adjust headroom according to target device. Signed-off-by: Pravin B Shelar --- drivers/net/vxlan.c | 13 + 1 files changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6ca2632..e53c947 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1040,6 +1040,7 @@ int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, { struct vxlanhdr *vxh; struct udphdr *uh; + int min_headroom; int err; if (!skb->encapsulation) { @@ -1047,6 +1048,14 @@ int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, skb->encapsulation = 1; } + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + VXLAN_HLEN + sizeof(struct iphdr); + + /* Need space for new headers (invalidates iph ptr) */ + err = skb_cow_head(skb, min_headroom); + if (unlikely(err)) + return err; + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; @@ -1099,10 +1108,6 @@ static netdev_tx_t __vxlan_xmit(struct sk_buff *skb, struct net_device *dev, goto drop; } - /* Need space for new headers (invalidates iph ptr) */ - if (skb_cow_head(skb, VXLAN_HEADROOM)) - goto drop; - old_iph = ip_hdr(skb); ttl = vxlan->ttl; -- 1.7.1 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH net-next 5/8] vxlan: Factor out vxlan send api.
Following patch allows more code sharing between vxlan and ovs-vxlan. Signed-off-by: Pravin B Shelar --- drivers/net/vxlan.c | 97 ++- include/net/vxlan.h |7 2 files changed, 64 insertions(+), 40 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6ca21c0..6ca2632 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -961,11 +961,8 @@ static void vxlan_sock_put(struct sk_buff *skb) } /* On transmit, associate with the tunnel socket */ -static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb) +static void vxlan_set_owner(struct sock *sk, struct sk_buff *skb) { - struct vxlan_dev *vxlan = netdev_priv(dev); - struct sock *sk = vxlan->vh->vs->sock->sk; - skb_orphan(skb); sock_hold(sk); skb->sk = sk; @@ -977,9 +974,9 @@ static void vxlan_set_owner(struct net_device *dev, struct sk_buff *skb) * better and maybe available from hardware * secondary choice is to use jhash on the Ethernet header */ -static __be16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb) +__be16 vxlan_src_port(__u16 port_min, __u16 port_max, struct sk_buff *skb) { - unsigned int range = (vxlan->port_max - vxlan->port_min) + 1; + unsigned int range = (port_max - port_min) + 1; u32 hash; hash = skb_get_rxhash(skb); @@ -987,8 +984,9 @@ static __be16 vxlan_src_port(const struct vxlan_dev *vxlan, struct sk_buff *skb) hash = jhash(skb->data, 2 * ETH_ALEN, (__force u32) skb->protocol); - return htonsu64) hash * range) >> 32) + vxlan->port_min); + return htonsu64) hash * range) >> 32) + port_min); } +EXPORT_SYMBOL_GPL(vxlan_src_port); static int handle_offloads(struct sk_buff *skb) { @@ -1035,14 +1033,51 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan, } } -static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, +int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, + struct rtable *rt, struct sk_buff *skb, + __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df, + __be16 src_port, __be16 dst_port, __be32 vni) +{ + struct vxlanhdr *vxh; + struct udphdr *uh; + int err; + + if (!skb->encapsulation) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); + vxh->vx_flags = htonl(VXLAN_FLAGS); + vxh->vx_vni = vni; + + __skb_push(skb, sizeof(*uh)); + skb_reset_transport_header(skb); + uh = udp_hdr(skb); + + uh->dest = dst_port; + uh->source = src_port; + + uh->len = htons(skb->len); + uh->check = 0; + + vxlan_set_owner(vh->vs->sock->sk, skb); + + err = handle_offloads(skb); + if (err) + return err; + + return iptunnel_xmit(net, rt, skb, src, dst, +IPPROTO_UDP, tos, ttl, df); +} +EXPORT_SYMBOL_GPL(vxlan_xmit_skb); + +static netdev_tx_t __vxlan_xmit(struct sk_buff *skb, struct net_device *dev, struct vxlan_rdst *rdst, bool did_rsc) { struct vxlan_dev *vxlan = netdev_priv(dev); struct rtable *rt; const struct iphdr *old_iph; - struct vxlanhdr *vxh; - struct udphdr *uh; struct flowi4 fl4; __be32 dst; __be16 src_port, dst_port; @@ -1064,11 +1099,6 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto drop; } - if (!skb->encapsulation) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } - /* Need space for new headers (invalidates iph ptr) */ if (skb_cow_head(skb, VXLAN_HEADROOM)) goto drop; @@ -1083,7 +1113,7 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (tos == 1) tos = ip_tunnel_get_dsfield(old_iph, skb); - src_port = vxlan_src_port(vxlan, skb); + src_port = vxlan_src_port(vxlan->port_min, vxlan->port_max, skb); memset(&fl4, 0, sizeof(fl4)); fl4.flowi4_oif = rdst->remote_ifindex; @@ -1100,9 +1130,8 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, if (rt->dst.dev == dev) { netdev_dbg(dev, "circular route to %pI4\n", &dst); - ip_rt_put(rt); dev->stats.collisions++; - goto tx_error; + goto rt_tx_error; } /* Bypass encapsulation if the destination is local */ @@ -1117,30 +1146,16 @@ static netdev_tx_t vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vxlan_encap_bypass(skb, vxlan, dst_vxlan); return NET
[ovs-dev] [PATCH net-next 7/8] vxlan: Add tx-vlan offload support.
Following patch allows transmit side vlan offload for vxlan devices. Signed-off-by: Pravin B Shelar --- drivers/net/vxlan.c | 15 ++- 1 files changed, 14 insertions(+), 1 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e53c947..a7feba4 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -1049,13 +1050,23 @@ int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, } min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len - + VXLAN_HLEN + sizeof(struct iphdr); + + VXLAN_HLEN + sizeof(struct iphdr) + + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0); /* Need space for new headers (invalidates iph ptr) */ err = skb_cow_head(skb, min_headroom); if (unlikely(err)) return err; + if (vlan_tx_tag_present(skb)) { + if (unlikely(!__vlan_put_tag(skb, +skb->vlan_proto, +vlan_tx_tag_get(skb { + return -ENOMEM; + } + skb->vlan_tci = 0; + } + vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh)); vxh->vx_flags = htonl(VXLAN_FLAGS); vxh->vx_vni = vni; @@ -1389,6 +1400,8 @@ static void vxlan_setup(struct net_device *dev) dev->features |= NETIF_F_RXCSUM; dev->features |= NETIF_F_GSO_SOFTWARE; + dev->vlan_features = dev->features; + dev->features |= NETIF_F_HW_VLAN_CTAG_TX; dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; dev->hw_features |= NETIF_F_GSO_SOFTWARE; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; -- 1.7.1 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH net-next 1/8] vxlan: Fix error handling while creating device.
Delete vxlan-sock list entry from list before freeing the memory. Signed-off-by: Pravin B Shelar --- drivers/net/vxlan.c |4 +++- 1 files changed, 3 insertions(+), 1 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 284c6c0..173705f 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -1606,9 +1606,11 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, err = register_netdevice(dev); if (err) { if (--vs->refcnt == 0) { + hlist_del_rcu(&vs->hlist); rtnl_unlock(); + sk_release_kernel(vs->sock->sk); - kfree(vs); + kfree_rcu(vs, rcu); rtnl_lock(); } return err; -- 1.7.1 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH net-next 8/8] openvswitch: Add vxlan tunneling support.
Following patch adds vxlan vport type for openvswitch using vxlan api. This patch adds vxlan dependency for openvswitch. Signed-off-by: Pravin B Shelar --- include/uapi/linux/openvswitch.h | 11 ++ net/openvswitch/Kconfig |1 + net/openvswitch/Makefile |3 +- net/openvswitch/vport-vxlan.c| 220 ++ net/openvswitch/vport.c |3 + net/openvswitch/vport.h |1 + 6 files changed, 238 insertions(+), 1 deletions(-) create mode 100644 net/openvswitch/vport-vxlan.c diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index c55efaa..52490b0 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -165,6 +165,7 @@ enum ovs_vport_type { OVS_VPORT_TYPE_NETDEV, /* network device */ OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */ OVS_VPORT_TYPE_GRE, /* GRE tunnel. */ + OVS_VPORT_TYPE_VXLAN,/* VXLAN tunnel. */ __OVS_VPORT_TYPE_MAX }; @@ -211,6 +212,16 @@ enum ovs_vport_attr { #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1) +/* OVS_VPORT_ATTR_OPTIONS attributes for tunnels. + */ +enum { + OVS_TUNNEL_ATTR_UNSPEC, + OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */ + __OVS_TUNNEL_ATTR_MAX +}; + +#define OVS_TUNNEL_ATTR_MAX (__OVS_TUNNEL_ATTR_MAX - 1) + /* Flows. */ #define OVS_FLOW_FAMILY "ovs_flow" diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 9fbc04a..201b40c 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -20,6 +20,7 @@ config OPENVSWITCH translate it into packet processing rules. Open vSwitch GRE support depends on CONFIG_NET_IPGRE_DEMUX. + Open vSwitch VXLAN support depends on CONFIG_VXLAN. See http://openvswitch.org for more information and userspace utilities. diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 01bddb2..33952c9 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -12,4 +12,5 @@ openvswitch-y := \ vport.o \ vport-gre.o \ vport-internal_dev.o \ - vport-netdev.o + vport-netdev.o \ + vport-vxlan.o diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c new file mode 100644 index 000..be22134 --- /dev/null +++ b/net/openvswitch/vport-vxlan.c @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2013 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA + */ + +#if IS_ENABLED(CONFIG_VXLAN) + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "datapath.h" +#include "vport.h" + +#define OVS_VXLAN_RCV_PRIORITY 8 + +/** + * struct vxlan_port - Keeps track of open UDP ports + * @vh: vxlan_handler created for the port. + * @dst_port: vxlan UDP port no. + * @name: vport name. + */ +struct vxlan_port { + struct vxlan_handler *vh; + __be16 dst_port; + char name[IFNAMSIZ]; +}; + +static inline struct vxlan_port *vxlan_vport(const struct vport *vport) +{ + return vport_priv(vport); +} + +/* Called with rcu_read_lock and BH disabled. */ +static int vxlan_rcv(struct vxlan_handler *vh, struct sk_buff *skb, __be32 vx_vni) +{ + struct vport *vport = vh->data; + struct iphdr *iph; + struct ovs_key_ipv4_tunnel tun_key; + __be64 key; + + if (unlikely(!vport)) + return PACKET_REJECT; + + /* Save outer tunnel values */ + iph = ip_hdr(skb); + key = cpu_to_be64(ntohl(vx_vni) >> 8); + ovs_flow_tun_key_init(&tun_key, iph, key, TUNNEL_KEY); + + ovs_vport_receive(vport, skb, &tun_key); + return PACKET_RCVD; +} + +static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb) +{ + struct vxlan_port *vxlan_port = vxlan_vport(vport); + + if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(vxlan_port->dst_port))) + return -EMSGSIZE; + return 0; +} + +static void vxlan_tnl_destroy(struct vport *vport) +{ + struct vxlan_port *vxlan_port = vxlan_vport(vport); + + rtnl_lock(); + vxlan_del_han
Re: [ovs-dev] [PATCH net-next 7/8] vxlan: Add tx-vlan offload support.
Hello. On 20-06-2013 11:26, Pravin B Shelar wrote: Following patch allows transmit side vlan offload for vxlan devices. Signed-off-by: Pravin B Shelar --- drivers/net/vxlan.c | 15 ++- 1 files changed, 14 insertions(+), 1 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index e53c947..a7feba4 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c [...] @@ -1049,13 +1050,23 @@ int vxlan_xmit_skb(struct net *net, struct vxlan_handler *vh, [...] + if (vlan_tx_tag_present(skb)) { + if (unlikely(!__vlan_put_tag(skb, +skb->vlan_proto, +vlan_tx_tag_get(skb { + return -ENOMEM; + } You haven't run this patch thru scripts/checkpatch.pl, have you? It should have warned you about using {} on single statement branch. + skb->vlan_tci = 0; + } + WBR, Sergei ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH net-next 5/8] vxlan: Factor out vxlan send api.
> From: Pravin B Shelar > @@ -1205,13 +1222,13 @@ static netdev_tx_t vxlan_xmit(struct sk_buff > *skb, struct net_device *dev) > >skb1 = skb_clone(skb, GFP_ATOMIC); >if (skb1) { > - rc1 = vxlan_xmit_one(skb1, dev, rdst, did_rsc); > + rc1 = __vxlan_xmit(skb1, dev, rdst, did_rsc); > if (rc == NETDEV_TX_OK) > rc = rc1; >} > } > > - rc1 = vxlan_xmit_one(skb, dev, rdst0, did_rsc); > + rc1 = __vxlan_xmit(skb, dev, rdst0, did_rsc); > if (rc == NETDEV_TX_OK) >rc = rc1; > return rc; You're changing the name of the function that sends a packet to one destination because you think "__vxlan_xmit" is more descriptive than "vxlan_xmit_one" ? +-DLS ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] Bug#686518: openvswitch-switch: No network configuration with openvswitch
Hi, The bug still exists and the added "ifup -a" helps, but if the interface has not been set up before you need to set the link up manually : # ip link set eth1 up I added this to my /etc/network/interfaces to fix this at boot : auto eth1 iface eth1 inet manual pre-up ip link set eth1 up Regards, -- Jonathan Dupart ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [RFC PATCH 2/8] ofpacts_check: Remove unnecessary flow copying.
Signed-off-by: Jarno Rajahalme --- lib/ofp-actions.c | 34 +++--- lib/ofp-actions.h |3 ++- 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index c9e000f..7a2851e 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -1141,9 +1141,9 @@ exit: return error; } +/* May modify flow->dl_type, caller must restore it. */ static enum ofperr -ofpact_check__(const struct ofpact *a, const struct flow *flow, int max_ports, - ovs_be16 *dl_type) +ofpact_check__(const struct ofpact *a, struct flow *flow, int max_ports) { const struct ofpact_enqueue *enqueue; @@ -1215,11 +1215,11 @@ ofpact_check__(const struct ofpact *a, const struct flow *flow, int max_ports, return 0; case OFPACT_PUSH_MPLS: -*dl_type = ofpact_get_PUSH_MPLS(a)->ethertype; +flow->dl_type = ofpact_get_PUSH_MPLS(a)->ethertype; return 0; case OFPACT_POP_MPLS: -*dl_type = ofpact_get_POP_MPLS(a)->ethertype; +flow->dl_type = ofpact_get_POP_MPLS(a)->ethertype; return 0; case OFPACT_SAMPLE: @@ -1240,33 +1240,21 @@ ofpact_check__(const struct ofpact *a, const struct flow *flow, int max_ports, * switch with no more than 'max_ports' ports. */ enum ofperr ofpacts_check(const struct ofpact ofpacts[], size_t ofpacts_len, - const struct flow *flow, int max_ports) + struct flow *flow, int max_ports) { const struct ofpact *a; ovs_be16 dl_type = flow->dl_type; -struct flow updated_flow; +enum ofperr error = 0; OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { -enum ofperr error; - -/* If the dl_type was changed by an action then its new value - * should be present in the flow passed to ofpact_check__(). */ -if (flow->dl_type != dl_type) { -/* Only copy flow at most once */ -if (flow != &updated_flow) { -updated_flow = *flow; -flow = &updated_flow; -} -updated_flow.dl_type = dl_type; -} - -error = ofpact_check__(a, flow, max_ports, &dl_type); +error = ofpact_check__(a, flow, max_ports); if (error) { -return error; +goto out; } } - -return 0; +out: +flow->dl_type = dl_type; /* Restore. */ +return error; } /* Verifies that the 'ofpacts_len' bytes of actions in 'ofpacts' are diff --git a/lib/ofp-actions.h b/lib/ofp-actions.h index 4e7fb5c..bba3927 100644 --- a/lib/ofp-actions.h +++ b/lib/ofp-actions.h @@ -492,8 +492,9 @@ enum ofperr ofpacts_pull_openflow11_instructions(struct ofpbuf *openflow, unsigned int instructions_len, uint8_t table_id, struct ofpbuf *ofpacts); +/* May temporarily modify flow */ enum ofperr ofpacts_check(const struct ofpact[], size_t ofpacts_len, - const struct flow *, int max_ports); + struct flow *, int max_ports); enum ofperr ofpacts_verify(const struct ofpact ofpacts[], size_t ofpacts_len); /* Converting ofpacts to OpenFlow. */ -- 1.7.10.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [RFC PATCH 1/8] ofp-util: Remove prototype of non-existing function.
Signed-off-by: Jarno Rajahalme --- lib/ofp-util.h |2 -- 1 file changed, 2 deletions(-) diff --git a/lib/ofp-util.h b/lib/ofp-util.h index 1c8d6cd..431521c 100644 --- a/lib/ofp-util.h +++ b/lib/ofp-util.h @@ -676,8 +676,6 @@ void *ofputil_put_action(enum ofputil_action_code, struct ofpbuf *buf); #define OFP_ACTION_ALIGN 8 /* Alignment of ofp_actions. */ -enum ofperr validate_actions(const union ofp_action *, size_t n_actions, - const struct flow *, int max_ports); bool action_outputs_to_port(const union ofp_action *, ovs_be16 port); enum ofperr ofputil_pull_actions(struct ofpbuf *, unsigned int actions_len, -- 1.7.10.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [RFC PATCH 4/8] ofproto: Implement OpenFlow 1.3 meter table.
Signed-off-by: Jarno Rajahalme --- lib/ofp-actions.c | 35 +++- lib/ofp-actions.h | 14 ++ lib/rconn.c| 14 +- ofproto/ofproto-dpif.c | 33 ofproto/ofproto-provider.h | 24 +++ ofproto/ofproto.c | 467 +--- ofproto/ofproto.h |4 + 7 files changed, 555 insertions(+), 36 deletions(-) diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index b487f5e..056ecd6 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -1237,10 +1237,16 @@ ofpact_check__(const struct ofpact *a, struct flow *flow, int max_ports) case OFPACT_CLEAR_ACTIONS: case OFPACT_WRITE_METADATA: -case OFPACT_METER: case OFPACT_GOTO_TABLE: return 0; +case OFPACT_METER: { +uint32_t mid = ofpact_get_METER(a)->meter_id; +if (mid == 0 || mid > OFPM13_MAX) { +return OFPERR_OFPBIC_EPERM; /* XXX: NEED OFPERR_OFPBIC_BAD_METER */ +} +return 0; +} default: NOT_REACHED(); } @@ -1268,6 +1274,33 @@ out: return error; } +/* Checks that the 'ofpacts_len' bytes of actions in 'ofpacts' are + * appropriate for a packet with the prerequisites satisfied by 'flow' using + * the context 'ctx'. */ +enum ofperr +ofpacts_check_ctx(struct ofpacts_check_ctx *ctx, + const struct ofpact ofpacts[], size_t ofpacts_len, + struct flow *flow) +{ +const struct ofpact *a; +ovs_be16 dl_type = flow->dl_type; +enum ofperr error = 0; + +OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { +if (ctx->funcs[a->type]) { +error = ctx->funcs[a->type](ctx, a, flow); +} else { +error = ofpact_check__(a, flow, ctx->max_ports); +} +if (error) { +goto out; +} +} +out: +flow->dl_type = dl_type; /* Restore. */ +return error; +} + /* Verifies that the 'ofpacts_len' bytes of actions in 'ofpacts' are * in the appropriate order as defined by the OpenFlow spec. */ enum ofperr diff --git a/lib/ofp-actions.h b/lib/ofp-actions.h index d445cf0..f039f88 100644 --- a/lib/ofp-actions.h +++ b/lib/ofp-actions.h @@ -504,6 +504,20 @@ enum ofperr ofpacts_pull_openflow11_instructions(struct ofpbuf *openflow, /* May temporarily modify flow */ enum ofperr ofpacts_check(const struct ofpact[], size_t ofpacts_len, struct flow *, int max_ports); + +struct ofpacts_check_ctx; +typedef enum ofperr (*ofpacts_checker_fp)(struct ofpacts_check_ctx *, + const struct ofpact *, + const struct flow *); +struct ofpacts_check_ctx { +void *owner; +ofpacts_checker_fp const *funcs; +int max_ports; +}; +enum ofperr ofpacts_check_ctx(struct ofpacts_check_ctx *, + const struct ofpact[], size_t ofpacts_len, + struct flow *); + enum ofperr ofpacts_verify(const struct ofpact ofpacts[], size_t ofpacts_len); /* Converting ofpacts to OpenFlow. */ diff --git a/lib/rconn.c b/lib/rconn.c index 4922a5c..ffd2738 100644 --- a/lib/rconn.c +++ b/lib/rconn.c @@ -1137,19 +1137,12 @@ is_admitted_msg(const struct ofpbuf *b) case OFPTYPE_QUEUE_GET_CONFIG_REPLY: case OFPTYPE_GET_ASYNC_REQUEST: case OFPTYPE_GET_ASYNC_REPLY: -case OFPTYPE_METER_MOD: case OFPTYPE_GROUP_REQUEST: case OFPTYPE_GROUP_REPLY: case OFPTYPE_GROUP_DESC_REQUEST: case OFPTYPE_GROUP_DESC_REPLY: case OFPTYPE_GROUP_FEATURES_REQUEST: case OFPTYPE_GROUP_FEATURES_REPLY: -case OFPTYPE_METER_REQUEST: -case OFPTYPE_METER_REPLY: -case OFPTYPE_METER_CONFIG_REQUEST: -case OFPTYPE_METER_CONFIG_REPLY: -case OFPTYPE_METER_FEATURES_REQUEST: -case OFPTYPE_METER_FEATURES_REPLY: case OFPTYPE_TABLE_FEATURES_REQUEST: case OFPTYPE_TABLE_FEATURES_REPLY: return false; @@ -1160,6 +1153,7 @@ is_admitted_msg(const struct ofpbuf *b) case OFPTYPE_PACKET_OUT: case OFPTYPE_FLOW_MOD: case OFPTYPE_PORT_MOD: +case OFPTYPE_METER_MOD: case OFPTYPE_BARRIER_REQUEST: case OFPTYPE_BARRIER_REPLY: case OFPTYPE_DESC_STATS_REQUEST: @@ -1176,6 +1170,12 @@ is_admitted_msg(const struct ofpbuf *b) case OFPTYPE_QUEUE_STATS_REPLY: case OFPTYPE_PORT_DESC_STATS_REQUEST: case OFPTYPE_PORT_DESC_STATS_REPLY: +case OFPTYPE_METER_REQUEST: +case OFPTYPE_METER_REPLY: +case OFPTYPE_METER_CONFIG_REQUEST: +case OFPTYPE_METER_CONFIG_REPLY: +case OFPTYPE_METER_FEATURES_REQUEST: +case OFPTYPE_METER_FEATURES_REPLY: case OFPTYPE_ROLE_REQUEST: case OFPTYPE_ROLE_REPLY: case OFPTYPE_SET_FLOW_FORMAT: diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 6f7fd0d..7afce20 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -6605,6 +6605,35 @@ vsp_add(struct ofport_dpif *port, uint16_t realdev_ofp_port, int vid)
[ovs-dev] [RFC PATCH 0/8] OpenFlow 1.3 meter implementation.
This series adds basic support for OpenFlow 1.3 meters. As meters need to act on possibly every packet seen by a datapath, the meters must be implemented in the datapath(s). This series includes a crude implementation on the userspace datapath, which is yet to be fully tested. Since a datapath can serve multiple bridges, the meter numbering spaces at the OpenFlow level and the datapath level need to be separated (this is similar to port numbering). In principle each ofproto-provider is responsible for the mapping between the two ID spaces, but the overhead of doing this is minimized by having space for the "provider meter ID" already at the main meter table in ofproto. I have tried to minimize the meter-related management overhead on the datapath(s), so there is no query interface to enumerate the datapath meters, but the upper lever is responsible of knowing what meters it has installed. Unit tests are included for parsers but the actual meter action support in dpif-netdev.c is yet to be tested. Also, documentation is yet to be written. I'm sending this now to gather feedback on the overall design before I invest more time in, e.g., kernel datapath meter support. Jarno Rajahalme (8): ofp-util: Remove prototype of non-existing function. ofpacts_check: Remove unnecessary flow copying. ofp-util: Support for OpenFlow 1.3 meters. ofproto: Implement OpenFlow 1.3 meter table. dpif: Meter framework. dpif-netdev: Simple token-bucket meter implementation. datapath: Meter placeholders. ovs-ofctl: Add meter support. datapath/actions.c | 16 +- datapath/datapath.c|5 + datapath/datapath.h|2 + include/linux/openvswitch.h|3 + include/openflow/openflow-1.3.h|2 +- include/openflow/openflow-common.h |1 + lib/dpif-linux.c | 40 +++ lib/dpif-netdev.c | 244 ++- lib/dpif-provider.h| 28 +++ lib/dpif.c | 87 +++ lib/dpif.h | 11 + lib/odp-execute.c | 20 +- lib/odp-execute.h |5 +- lib/odp-util.c | 14 ++ lib/ofp-actions.c | 95 ++-- lib/ofp-actions.h | 33 ++- lib/ofp-msgs.h |6 +- lib/ofp-parse.c| 216 - lib/ofp-parse.h|4 + lib/ofp-print.c| 266 +++- lib/ofp-util.c | 370 lib/ofp-util.h | 89 ++- lib/rconn.c| 14 +- ofproto/ofproto-dpif-xlate.c | 18 +- ofproto/ofproto-dpif.c | 56 + ofproto/ofproto-provider.h | 24 ++ ofproto/ofproto.c | 467 +--- ofproto/ofproto.h |4 + tests/ofp-print.at | 89 +++ utilities/ovs-ofctl.c | 88 +++ 30 files changed, 2231 insertions(+), 86 deletions(-) -- 1.7.10.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [RFC PATCH 3/8] ofp-util: Support for OpenFlow 1.3 meters.
Signed-off-by: Jarno Rajahalme --- include/openflow/openflow-1.3.h|2 +- include/openflow/openflow-common.h |1 + lib/ofp-actions.c | 32 +++- lib/ofp-actions.h | 16 +- lib/ofp-msgs.h |6 +- lib/ofp-parse.c| 216 - lib/ofp-parse.h|4 + lib/ofp-print.c| 266 +- lib/ofp-util.c | 370 lib/ofp-util.h | 87 + ofproto/ofproto-dpif-xlate.c |4 + tests/ofp-print.at | 89 + 12 files changed, 1075 insertions(+), 18 deletions(-) diff --git a/include/openflow/openflow-1.3.h b/include/openflow/openflow-1.3.h index 231bba9..1071d3d 100644 --- a/include/openflow/openflow-1.3.h +++ b/include/openflow/openflow-1.3.h @@ -402,7 +402,7 @@ struct ofp13_meter_stats { ovs_be32 duration_sec; /* Time meter has been alive in seconds. */ ovs_be32 duration_nsec; /* Time meter has been alive in nanoseconds beyond duration_sec. */ -/* struct ofp13_meter_band_stats band_stats[0]; The band_stats length is +struct ofp13_meter_band_stats band_stats[0]; /* The band_stats length is inferred from the length field. */ }; OFP_ASSERT(sizeof(struct ofp13_meter_stats) == 40); diff --git a/include/openflow/openflow-common.h b/include/openflow/openflow-common.h index 3cc22c9..ee79136 100644 --- a/include/openflow/openflow-common.h +++ b/include/openflow/openflow-common.h @@ -352,6 +352,7 @@ enum ofp_flow_removed_reason { OFPRR_DELETE, /* Evicted by a DELETE flow mod. */ OFPRR_GROUP_DELETE, /* Group was removed. */ OFPRR_EVICTION, /* Switch eviction to free resources. */ +OFPRR_METER_DELETE, /* Meter was removed. */ }; /* What changed about the physical port */ diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c index 7a2851e..b487f5e 100644 --- a/lib/ofp-actions.c +++ b/lib/ofp-actions.c @@ -1086,6 +1086,16 @@ ofpacts_pull_openflow11_instructions(struct ofpbuf *openflow, goto exit; } +if (insts[OVSINST_OFPIT13_METER]) { +const struct ofp13_instruction_meter *oim; +struct ofpact_meter *om; + +oim = (const struct ofp13_instruction_meter *) +insts[OVSINST_OFPIT13_METER]; + +om = ofpact_put_METER(ofpacts); +om->meter_id = ntohl(oim->meter_id); +} if (insts[OVSINST_OFPIT11_APPLY_ACTIONS]) { const union ofp_action *actions; size_t n_actions; @@ -1227,6 +1237,7 @@ ofpact_check__(const struct ofpact *a, struct flow *flow, int max_ports) case OFPACT_CLEAR_ACTIONS: case OFPACT_WRITE_METADATA: +case OFPACT_METER: case OFPACT_GOTO_TABLE: return 0; @@ -1269,7 +1280,9 @@ ofpacts_verify(const struct ofpact ofpacts[], size_t ofpacts_len) OFPACT_FOR_EACH (a, ofpacts, ofpacts_len) { enum ovs_instruction_type next; -if (a->type == OFPACT_CLEAR_ACTIONS) { +if (a->type == OFPACT_METER) { +next = OVSINST_OFPIT13_METER; +} else if (a->type == OFPACT_CLEAR_ACTIONS) { next = OVSINST_OFPIT11_CLEAR_ACTIONS; } else if (a->type == OFPACT_WRITE_METADATA) { next = OVSINST_OFPIT11_WRITE_METADATA; @@ -1549,6 +1562,7 @@ ofpact_to_nxast(const struct ofpact *a, struct ofpbuf *out) case OFPACT_SET_L4_DST_PORT: case OFPACT_CLEAR_ACTIONS: case OFPACT_GOTO_TABLE: +case OFPACT_METER: NOT_REACHED(); } } @@ -1641,6 +1655,7 @@ ofpact_to_openflow10(const struct ofpact *a, struct ofpbuf *out) case OFPACT_PUSH_VLAN: case OFPACT_CLEAR_ACTIONS: case OFPACT_GOTO_TABLE: +case OFPACT_METER: /* XXX */ break; @@ -1813,6 +1828,7 @@ ofpact_to_openflow11(const struct ofpact *a, struct ofpbuf *out) case OFPACT_CLEAR_ACTIONS: case OFPACT_GOTO_TABLE: +case OFPACT_METER: NOT_REACHED(); case OFPACT_CONTROLLER: @@ -1893,6 +1909,13 @@ ofpacts_put_openflow11_instructions(const struct ofpact ofpacts[], oiwm = instruction_put_OFPIT11_WRITE_METADATA(openflow); oiwm->metadata = om->metadata; oiwm->metadata_mask = om->mask; +} else if (a->type == OFPACT_METER) { +const struct ofpact_meter *om; +struct ofp13_instruction_meter *oim; + +om = ofpact_get_METER(a); +oim = instruction_put_OFPIT13_METER(openflow); +oim->meter_id = htonl(om->meter_id); } else if (!ofpact_is_instruction(a)) { /* Apply-actions */ const size_t ofs = openflow->size; @@ -1962,6 +1985,7 @@ ofpact_outputs_to_port(const struct ofpact *ofpact, uint16_t port) case OFPACT_SAMPLE: case OFPACT_CLEA
[ovs-dev] [RFC PATCH 6/8] dpif-netdev: Simple token-bucket meter implementation.
Signed-off-by: Jarno Rajahalme --- lib/dpif-netdev.c | 273 + 1 file changed, 236 insertions(+), 37 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 1f57917..32a0b7a 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -60,6 +60,9 @@ VLOG_DEFINE_THIS_MODULE(dpif_netdev); /* Configuration parameters. */ enum { MAX_PORTS = 256 }; /* Maximum number of ports. */ enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */ +enum { MAX_METERS = 65536 };/* Maximum number of meters. */ +enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */ +enum { MAX_DSCP_PREC_LEVEL = 0 }; /* Maximum increase in DSCP value. */ /* Enough headroom to add a vlan tag, plus an extra 2 bytes to allow IP * headers to be aligned on a 4-byte boundary. */ @@ -81,6 +84,34 @@ struct dp_netdev_queue { unsigned int head, tail; }; +/* Set of supported meter flags */ +#define DP_SUPPORTED_METER_FLAGS_MASK \ +(OFPMF13_STATS | OFPMF13_PKTPS | OFPMF13_KBPS | OFPMF13_BURST) + +/* Set of supported meter band types */ +#define DP_SUPPORTED_METER_BAND_TYPES \ +( 1 << OFPMBT13_DROP ) + +struct dp_meter_band { +struct ofputil_meter_band up; /* type, prec_level, pad, rate, burst_size */ +uint32_t bucket; +uint64_t packet_count; +uint64_t byte_count; +}; + +struct dp_meter { +uint16_t flags; +uint16_t n_bands; +uint64_t used; +uint64_t packet_count; +uint64_t byte_count; +struct dp_meter_band bands[]; +}; + +static bool +dp_netdev_action_meter(void *dp_, struct ofpbuf *packet, uint32_t meter_id, + long long int now); + /* Datapath based on the network device interface from netdev.h. */ struct dp_netdev { const struct dpif_class *class; @@ -100,6 +131,10 @@ struct dp_netdev { struct dp_netdev_port *ports[MAX_PORTS]; struct list port_list; unsigned int serial; + +/* Meters. */ +struct dp_meter *meters[MAX_METERS]; /* meter bands */ +uint32_t meter_free; /* Next free meter */ }; /* A port in a netdev-based datapath. */ @@ -1026,43 +1061,7 @@ dpif_netdev_recv_purge(struct dpif *dpif) struct dpif_netdev *dpif_netdev = dpif_netdev_cast(dpif); dp_netdev_purge_queues(dpif_netdev->dp); } - -/* Meters */ -static void -dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED, - struct ofputil_meter_features *features) -{ -features->max_meters = 0; -features->band_types = 0; -features->capabilities = 0; -features->max_bands = 0; -features->max_color = 0; -} - -static int -dpif_netdev_meter_set(struct dpif *dpif OVS_UNUSED, - ofproto_meter_id *meter_id OVS_UNUSED, - const struct ofputil_meter_config *config OVS_UNUSED) -{ -return EFBIG; /* meter_id out of range */ -} -static int -dpif_netdev_meter_get(const struct dpif *dpif OVS_UNUSED, - ofproto_meter_id meter_id OVS_UNUSED, - struct ofputil_meter_stats *stats OVS_UNUSED) -{ -return EFBIG; /* meter_id out of range */ -} - -static int -dpif_netdev_meter_del(struct dpif *dpif OVS_UNUSED, - ofproto_meter_id meter_id OVS_UNUSED, - struct ofputil_meter_stats *stats OVS_UNUSED) -{ -return EFBIG; /* meter_id out of range */ -} - static void dp_netdev_flow_used(struct dp_netdev_flow *flow, const struct ofpbuf *packet) { @@ -1213,7 +1212,207 @@ dp_netdev_execute_actions(struct dp_netdev *dp, { odp_execute_actions(dp, packet, key, actions, actions_len, dp_netdev_output_port, dp_netdev_action_userspace, -NULL); +dp_netdev_action_meter); +} + +/* Meters */ +static void +dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED, + struct ofputil_meter_features *features) +{ +features->max_meters = MAX_METERS; +features->band_types = DP_SUPPORTED_METER_BAND_TYPES >> 1; +features->capabilities = DP_SUPPORTED_METER_FLAGS_MASK; +features->max_bands = MAX_BANDS; +features->max_color = MAX_DSCP_PREC_LEVEL; +} + +static bool +dp_netdev_action_meter(void *dp_, struct ofpbuf *packet, uint32_t meter_id, + long long int now) +{ +struct dp_netdev *dp = dp_; +struct dp_meter *meter = dp->meters[meter_id]; +struct dp_meter_band *band; +int delta_t = (now - meter->used); /* msec */ +int divider; +uint32_t volume; +int i, band_exceeded_max = -1; +uint32_t band_exceeded_rate = 0; + +meter->used = now; +meter->packet_count += 1; +meter->byte_count += packet->size; + +if (meter->flags & OFPMF13_PKTPS) { +divider = 1000; /* msec * packets per second / 1000 = packets */ +volume = 1; /* Take one packet from the bucket */ +} else { +/* rate in kbps, bucket in
[ovs-dev] [RFC PATCH 8/8] ovs-ofctl: Add meter support.
Adds commands add-meter, mod-meter, del-meter, del-meters, dump-meter, dump-meters, meter-stats, and meter-features. Syntax is as follows: add-meter meter= (kbps|pktps) [burst] [stats] bands= type=(drop|dscp_remark) rate= [burst_size=] [prec_level=] mod-meter meter= (kbps|pktps) [burst] [stats] bands= type=(drop|dscp_remark) rate= [burst_size=] [prec_level=] del-meter meter=(|all) del-meters dump-meter meter=(|all) dump-meters meter-stats [meter=(|all)] meter-features Signed-off-by: Jarno Rajahalme --- utilities/ovs-ofctl.c | 88 + 1 file changed, 88 insertions(+) diff --git a/utilities/ovs-ofctl.c b/utilities/ovs-ofctl.c index 48f0fbf..7bafaa1 100644 --- a/utilities/ovs-ofctl.c +++ b/utilities/ovs-ofctl.c @@ -2175,6 +2175,86 @@ ofctl_diff_flows(int argc OVS_UNUSED, char *argv[]) exit(2); } } + +static void +ofctl_meter_mod__(const char *bridge, const char *str, int command, + bool verbose) +{ +struct ofputil_meter_mod mm; +struct vconn *vconn; + +if (str) { +parse_ofp_meter_mod_str(&mm, str, command, verbose); +} else { +mm.command = command; +mm.meter.meter_id = OFPM13_ALL; +} +open_vconn(bridge, &vconn); +transact_noreply(vconn, ofputil_encode_meter_mod(OFP13_VERSION, &mm)); +vconn_close(vconn); +} + + +static void +ofctl_meter_request__(const char *bridge, const char *str, + enum ofputil_meter_request_type type, bool verbose) +{ +struct ofputil_meter_mod mm; +struct vconn *vconn; + +if (str) { +parse_ofp_meter_mod_str(&mm, str, -1, verbose); +} else { +mm.meter.meter_id = OFPM13_ALL; +} + +open_vconn(bridge, &vconn); +transact_noreply(vconn, ofputil_encode_meter_request(OFP13_VERSION, + type, + mm.meter.meter_id)); +vconn_close(vconn); +} + + +static void +ofctl_add_meter(int argc OVS_UNUSED, char *argv[]) +{ +ofctl_meter_mod__(argv[1], argv[2], OFPMC13_ADD, true); +} + +static void +ofctl_mod_meter(int argc OVS_UNUSED, char *argv[]) +{ +ofctl_meter_mod__(argv[1], argv[2], OFPMC13_MODIFY, true); +} + +static void +ofctl_del_meters(int argc, char *argv[]) +{ +ofctl_meter_mod__(argv[1], argc > 2 ? argv[2] : NULL, + OFPMC13_DELETE, true); +} + +static void +ofctl_dump_meters(int argc, char *argv[]) +{ +ofctl_meter_request__(argv[1], argc > 2 ? argv[2] : NULL, + OFPUTIL_METER_CONFIG, true); +} + +static void +ofctl_meter_stats(int argc, char *argv[]) +{ +ofctl_meter_request__(argv[1], argc > 2 ? argv[2] : NULL, + OFPUTIL_METER_STATS, true); +} + +static void +ofctl_meter_features(int argc OVS_UNUSED, char *argv[]) +{ +ofctl_meter_request__(argv[1], NULL, OFPUTIL_METER_FEATURES, true); +} + /* Undocumented commands for unit testing. */ @@ -2865,6 +2945,14 @@ static const struct command all_commands[] = { { "del-flows", 1, 2, ofctl_del_flows }, { "replace-flows", 2, 2, ofctl_replace_flows }, { "diff-flows", 2, 2, ofctl_diff_flows }, +{ "add-meter", 2, 2, ofctl_add_meter }, +{ "mod-meter", 2, 2, ofctl_mod_meter }, +{ "del-meter", 2, 2, ofctl_del_meters }, +{ "del-meters", 1, 1, ofctl_del_meters }, +{ "dump-meter", 2, 2, ofctl_dump_meters }, +{ "dump-meters", 1, 1, ofctl_dump_meters }, +{ "meter-stats", 1, 2, ofctl_meter_stats }, +{ "meter-features", 1, 1, ofctl_meter_features }, { "packet-out", 4, INT_MAX, ofctl_packet_out }, { "dump-ports", 1, 2, ofctl_dump_ports }, { "dump-ports-desc", 1, 1, ofctl_dump_ports_desc }, -- 1.7.10.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [RFC PATCH 5/8] dpif: Meter framework.
Signed-off-by: Jarno Rajahalme --- include/linux/openvswitch.h |3 ++ lib/dpif-linux.c | 40 +++ lib/dpif-netdev.c| 43 - lib/dpif-provider.h | 28 ++ lib/dpif.c | 87 ++ lib/dpif.h | 11 ++ lib/odp-execute.c| 20 -- lib/odp-execute.h|5 ++- lib/odp-util.c | 14 +++ ofproto/ofproto-dpif-xlate.c | 16 +++- ofproto/ofproto-dpif.c | 45 -- 11 files changed, 293 insertions(+), 19 deletions(-) diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h index e890fd8..ddca0ac 100644 --- a/include/linux/openvswitch.h +++ b/include/linux/openvswitch.h @@ -516,6 +516,8 @@ struct ovs_action_push_vlan { * indicate the new packet contents This could potentially still be * %ETH_P_MPLS_* if the resulting MPLS label stack is not empty. If there * is no MPLS label stack, as determined by ethertype, no action is taken. + * @OVS_ACTION_ATTR_METER: Run packet through a meter, which may drop the + * packet, or execute further actions on the packet. * * Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all * fields within a header are modifiable, e.g. the IPv4 protocol and fragment @@ -532,6 +534,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_SAMPLE, /* Nested OVS_SAMPLE_ATTR_*. */ OVS_ACTION_ATTR_PUSH_MPLS,/* struct ovs_action_push_mpls. */ OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */ + OVS_ACTION_ATTR_METER,/* u32 meter number. */ __OVS_ACTION_ATTR_MAX }; diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index 1383b58..3af99ef 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -1386,6 +1386,42 @@ dpif_linux_recv_purge(struct dpif *dpif_) } } } + +/* Meters */ +static void +dpif_linux_meter_get_features(const struct dpif * dpif OVS_UNUSED, + struct ofputil_meter_features *features) +{ +features->max_meters = 0; +features->band_types = 0; +features->capabilities = 0; +features->max_bands = 0; +features->max_color = 0; +} + +static int +dpif_linux_meter_set(struct dpif *dpif OVS_UNUSED, + ofproto_meter_id *meter_id OVS_UNUSED, + const struct ofputil_meter_config *config OVS_UNUSED) +{ +return EFBIG; /* meter_id out of range */ +} + +static int +dpif_linux_meter_get(const struct dpif *dpif OVS_UNUSED, + ofproto_meter_id meter_id OVS_UNUSED, + struct ofputil_meter_stats *stats OVS_UNUSED) +{ +return EFBIG; /* meter_id out of range */ +} + +static int +dpif_linux_meter_del(struct dpif *dpif OVS_UNUSED, + ofproto_meter_id meter_id OVS_UNUSED, + struct ofputil_meter_stats *stats OVS_UNUSED) +{ +return EFBIG; /* meter_id out of range */ +} const struct dpif_class dpif_linux_class = { "system", @@ -1422,6 +1458,10 @@ const struct dpif_class dpif_linux_class = { dpif_linux_recv, dpif_linux_recv_wait, dpif_linux_recv_purge, +dpif_linux_meter_get_features, +dpif_linux_meter_set, +dpif_linux_meter_get, +dpif_linux_meter_del, }; static int diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 52aedb6..1f57917 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -1027,6 +1027,42 @@ dpif_netdev_recv_purge(struct dpif *dpif) dp_netdev_purge_queues(dpif_netdev->dp); } +/* Meters */ +static void +dpif_netdev_meter_get_features(const struct dpif * dpif OVS_UNUSED, + struct ofputil_meter_features *features) +{ +features->max_meters = 0; +features->band_types = 0; +features->capabilities = 0; +features->max_bands = 0; +features->max_color = 0; +} + +static int +dpif_netdev_meter_set(struct dpif *dpif OVS_UNUSED, + ofproto_meter_id *meter_id OVS_UNUSED, + const struct ofputil_meter_config *config OVS_UNUSED) +{ +return EFBIG; /* meter_id out of range */ +} + +static int +dpif_netdev_meter_get(const struct dpif *dpif OVS_UNUSED, + ofproto_meter_id meter_id OVS_UNUSED, + struct ofputil_meter_stats *stats OVS_UNUSED) +{ +return EFBIG; /* meter_id out of range */ +} + +static int +dpif_netdev_meter_del(struct dpif *dpif OVS_UNUSED, + ofproto_meter_id meter_id OVS_UNUSED, + struct ofputil_meter_stats *stats OVS_UNUSED) +{ +return EFBIG; /* meter_id out of range */ +} + static void dp_netdev_flow_used(struct dp_netdev_flow *flow, const struct ofpbuf *packet) { @@ -1176,7 +1212,8 @@ dp_netdev_execute_actions(struct dp_netdev *dp, size_t actions_len) { odp_execute_actions(dp, packet, key, actions, acti
[ovs-dev] [RFC PATCH 7/8] datapath: Meter placeholders.
Signed-off-by: Jarno Rajahalme --- datapath/actions.c | 16 +++- datapath/datapath.c |5 + datapath/datapath.h |2 ++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/datapath/actions.c b/datapath/actions.c index 09d0c3f..8c5e6d5 100644 --- a/datapath/actions.c +++ b/datapath/actions.c @@ -472,6 +472,15 @@ static int execute_set_action(struct sk_buff *skb, return err; } +/* May modify packet and execute other actions according to the meter + * configuration and state. + * Returns true if packet needs to be dropped. */ +static bool execute_meter_action(struct datapath *dp, struct sk_buff *skb, +uint32_t meter_id) +{ + return false; +} + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct nlattr *attr, int len, bool keep_skb) @@ -494,6 +503,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, } switch (nla_type(a)) { + case OVS_ACTION_ATTR_METER: + if (execute_meter_action(dp, skb, nla_get_u32(a))) + goto out; + break; + case OVS_ACTION_ATTR_OUTPUT: prev_port = nla_get_u32(a); break; @@ -526,7 +540,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return err; } } - +out: if (prev_port != -1) { if (keep_skb) skb = skb_clone(skb, GFP_ATOMIC); diff --git a/datapath/datapath.c b/datapath/datapath.c index 42af315..c90830a 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -780,6 +780,7 @@ static int validate_and_copy_actions(const struct nlattr *attr, /* Expected argument lengths, (u32)-1 for variable length. */ static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = { [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32), + [OVS_ACTION_ATTR_METER] = sizeof(u32), [OVS_ACTION_ATTR_USERSPACE] = (u32)-1, [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan), [OVS_ACTION_ATTR_POP_VLAN] = 0, @@ -811,6 +812,10 @@ static int validate_and_copy_actions(const struct nlattr *attr, return -EINVAL; break; + case OVS_ACTION_ATTR_METER: + if (nla_get_u32(a) >= DP_MAX_METERS) + return -EINVAL; + break; case OVS_ACTION_ATTR_POP_VLAN: break; diff --git a/datapath/datapath.h b/datapath/datapath.h index ad59a3a..787f53c 100644 --- a/datapath/datapath.h +++ b/datapath/datapath.h @@ -36,6 +36,8 @@ #define DP_MAX_PORTS USHRT_MAX #define DP_VPORT_HASH_BUCKETS 1024 +#define DP_MAX_METERS 0 /* Not implemented yet */ + #define SAMPLE_ACTION_DEPTH 3 /** -- 1.7.10.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH net-next 1/8] vxlan: Fix error handling while creating device.
On Thu, 20 Jun 2013 00:26:25 -0700 Pravin B Shelar wrote: > Delete vxlan-sock list entry from list before freeing the memory. > > Signed-off-by: Pravin B Shelar > --- > drivers/net/vxlan.c |4 +++- > 1 files changed, 3 insertions(+), 1 deletions(-) > > diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c > index 284c6c0..173705f 100644 > --- a/drivers/net/vxlan.c > +++ b/drivers/net/vxlan.c > @@ -1606,9 +1606,11 @@ static int vxlan_newlink(struct net *net, struct > net_device *dev, > err = register_netdevice(dev); > if (err) { > if (--vs->refcnt == 0) { > + hlist_del_rcu(&vs->hlist); > rtnl_unlock(); > + > sk_release_kernel(vs->sock->sk); > - kfree(vs); > + kfree_rcu(vs, rcu); > rtnl_lock(); > } > return err; This will not be necessary with my next patch set, that code all changes. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v13 1/4] ofproto-dpif: Add 'force-miss-model' configuration
On Thu, Jun 20, 2013 at 09:47:38AM +0900, Simon Horman wrote: > This adds support for specifying flow miss handling behaviour at > runtime, through a new "other-config" option in the Open_vSwitch table. > This is an extension to flow-eviction-threshold. > > By default, the behaviour is the same as before. If force-miss-model is > set to 1, then flow miss handling will always result in the creation of > new facets and flow-eviction-threshold will be ignored. If > force-miss-model is set to 2, then flow miss handling will never result > in the creation of new facets (effectively the same as setting the > flow-eviction-threshold to 0, which is not currently configurable). In the database, please use some meaningful names for these modes, instead of integers. Thanks, Ben. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH net-next 0/8] openvswitch: Add vxlan tunneling support.
On Thu, 20 Jun 2013 00:26:15 -0700 Pravin B Shelar wrote: > First two patches fixes vxlan issues. Next two patches > extends vxlan so that openvswitch can share vxlan udp port > with vxlan module. Rest of patches refactors vxlan data > plane so that ovs can share that code with vxlan module. > Last patch adds vxlan-vport to openvswitch. > > Pravin B Shelar (8): > vxlan: Fix error handling while creating device. > vxlan: Fix sparse wornings. > vxlan: Allow multiple recieve handlers. > vxlan: Extend vxlan handlers for openvswitch. > vxlan: Factor out vxlan send api. > vxlan: Improve vxlan headroom calculation. > vxlan: Add tx-vlan offload support. > openvswitch: Add vxlan tunneling support. > > drivers/net/vxlan.c | 337 > +- > include/net/vxlan.h | 33 > include/uapi/linux/openvswitch.h | 11 ++ > net/openvswitch/Kconfig |1 + > net/openvswitch/Makefile |3 +- > net/openvswitch/vport-vxlan.c| 220 + > net/openvswitch/vport.c |3 + > net/openvswitch/vport.h |1 + > 8 files changed, 490 insertions(+), 119 deletions(-) > create mode 100644 include/net/vxlan.h > create mode 100644 net/openvswitch/vport-vxlan.c I will merge the first 7 into my vxlan repository and send a pull request. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] vswitchd: Update flow-eviction-threshold documentation
On Thu, Jun 20, 2013 at 09:25:30AM +0900, Joe Stringer wrote: > Patch 27a88d1373cbfcceac6d901bbf1c17051aa7845f caused the vswitchd > documentation and the code to digress. This brings them back in line. > > Signed-off-by: Joe Stringer Thanks, applied to master, branch-1.11, branch-1.10. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v13 4/4] Add MPLS recirculation tests
On Thu, Jun 20, 2013 at 09:47:41AM +0900, Simon Horman wrote: > From: Joe Stringer > > This patch introduces a python script to generate about 1500 tests for > permutations of mpls_push,mpls_pop,dec_mpls_ttl,dec_ttl where > recirculation occurs up to (and including) three times. > > Signed-off-by: Joe Stringer > Signed-off-by: Simon Horman That's a great idea. Thanks. I look forward to including this when the rest of the commits have run through the gauntlet. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/2] datapath: Mega flow implementation
On Wed, Jun 19, 2013 at 11:14 PM, Andy Zhou wrote: > This patch seems to work fine. Please feel free to push. Thanks, I applied the combined patch. > I have two concerns mainly on the style side: > > 1. In the same file flow.c, we are mixing the _deferred_ functions with the > style of passing 'deferred' as a parameter in the same file. The > correctness of caller in setting 'deferred' value does not look obviously > correct. I agree that this is not great from a consistency perspective. I changed the table destroy function to use the same style but left the actions alone since we only have a deferred destroy function. Other than conventions though, which part do you think is hard to understand? > 2. In function ovw_flow_free() > ... > ovs_sw_flow_mask_del_ref((struct sw_flow_mask __force *)flow->mask, > deferred); > ... > > The type cast to (flow->mask) does not look good to me. It also defeats some > useful lockdep checks. In this case, the semantics are basically that the caller needs to own the table, which may occur either by holding OVS lock or through the expiration of an RCU grace period. The latter is hard for this function to determine, so using the usual ovsl_dereference would provoke spurious warnings. We have something similar for actions in __flow_free(), which has the same type of issues. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH net-next 0/8] openvswitch: Add vxlan tunneling support.
On Thu, Jun 20, 2013 at 8:58 AM, Stephen Hemminger wrote: > On Thu, 20 Jun 2013 00:26:15 -0700 > Pravin B Shelar wrote: > >> First two patches fixes vxlan issues. Next two patches >> extends vxlan so that openvswitch can share vxlan udp port >> with vxlan module. Rest of patches refactors vxlan data >> plane so that ovs can share that code with vxlan module. >> Last patch adds vxlan-vport to openvswitch. >> >> Pravin B Shelar (8): >> vxlan: Fix error handling while creating device. >> vxlan: Fix sparse wornings. >> vxlan: Allow multiple recieve handlers. >> vxlan: Extend vxlan handlers for openvswitch. >> vxlan: Factor out vxlan send api. >> vxlan: Improve vxlan headroom calculation. >> vxlan: Add tx-vlan offload support. >> openvswitch: Add vxlan tunneling support. >> >> drivers/net/vxlan.c | 337 >> +- >> include/net/vxlan.h | 33 >> include/uapi/linux/openvswitch.h | 11 ++ >> net/openvswitch/Kconfig |1 + >> net/openvswitch/Makefile |3 +- >> net/openvswitch/vport-vxlan.c| 220 + >> net/openvswitch/vport.c |3 + >> net/openvswitch/vport.h |1 + >> 8 files changed, 490 insertions(+), 119 deletions(-) >> create mode 100644 include/net/vxlan.h >> create mode 100644 net/openvswitch/vport-vxlan.c > > I will merge the first 7 into my vxlan repository and send a pull request. Once you've done that, I'll resync my tree with net-next and take the last one. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [threads 11/11] Makefile: Blacklist functions that threaded programs cannot use safely.
Looks good. Do we need to filter the warnings out from comments if they are not to be used in the code? Not specific to this patch, Why do we assume git is being used? it seems we can make the Makefile.am more portable otherwise. --andy ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/2] ovs-dpctl: Add mega flow support
On Tue, Jun 18, 2013 at 04:15:10PM -0700, Andy Zhou wrote: > Added support to allow mega flow specified and displayed. ovs-dpctl tool > is mainly used as debugging tool. > > This patch also implements the low level user space routines to send > and receive mega flow netlink messages. Those netlink suppor > routines are required for forthcoming user space mega flow patches. > > Added a unit test to test parsing and display of mega flows. > > Ethan contributed the ovs-dpctl mega flow output function. > > Co-authored-by: Ethan Jackson > Signed-off-by: Ethan Jackson > Signed-off-by: Andy Zhou > > --- > v1->v2 > Integrated Ethan's patch on ovs-dpctl mega flow output. > Add Ethan as a co-author for this patch. > > v2->v3 > Rebase to head to make review easier. > ovs-dpctl: Add mask input for tunnel configurations. > > v3->v4 > fix a typo > > v4->v5 > Add ovs-dpctl unit test cases for parsing mega flow. > Address Ben's review feedback. > > v5->v6 > Changes for ethtype > Address Ben's 2nd review feedback. I applied this to master. I couldn't resist making a few minor changes before I did: diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h index 84b5702..666c224 100644 --- a/lib/dpif-provider.h +++ b/lib/dpif-provider.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -279,15 +279,22 @@ struct dpif_class { * called again once it returns nonzero within a given iteration (but the * 'flow_dump_done' function will be called afterward). * - * On success, if 'key' and 'key_len' are nonnull then '*key' and - * '*key_len' must be set to Netlink attributes with types OVS_KEY_ATTR_* - * representing the dumped flow's key; if 'mask' and 'mask_len' are - * nonnull then '*mask' and '*mask_len' must be set to Netlink attributes - * with types of OVS_KEY_ATTR_* representing the dumped flow's mask. - * If 'actions' and 'actions_len' are nonnull then they should be set to - * Netlink attributes with types OVS_ACTION_ATTR_* representing the dumped - * flow's actions. If 'stats' is nonnull then it should be set to the - * dumped flow's statistics. + * On success: + * + * - If 'key' and 'key_len' are nonnull, then '*key' and '*key_len' + * must be set to Netlink attributes with types OVS_KEY_ATTR_* + * representing the dumped flow's key. + * + * - If 'mask' and 'mask_len' are nonnull then '*mask' and '*mask_len' + * must be set to Netlink attributes with types of OVS_KEY_ATTR_* + * representing the dumped flow's mask. + * + * - If 'actions' and 'actions_len' are nonnull then they should be set + * to Netlink attributes with types OVS_ACTION_ATTR_* representing + * the dumped flow's actions. + * + * - If 'stats' is nonnull then it should be set to the dumped flow's + * statistics. * * All of the returned data is owned by 'dpif', not by the caller, and the * caller must not modify or free it. 'dpif' must guarantee that it diff --git a/lib/packets.h b/lib/packets.h index 543a481..cc9ab3d 100644 --- a/lib/packets.h +++ b/lib/packets.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -278,11 +278,11 @@ vlan_tci_to_pcp(ovs_be16 vlan_tci) } /* Given the vlan_tci field from an 802.1Q header, in network byte order, - * returns the Canonical Format Indicator (CFI) in host byte order. */ + * returns the Canonical Format Indicator (CFI). */ static inline int vlan_tci_to_cfi(ovs_be16 vlan_tci) { -return (ntohs(vlan_tci) & VLAN_CFI) >> VLAN_CFI_SHIFT; +return (vlan_tci & htons(VLAN_CFI)) != 0; } #define VLAN_HEADER_LEN 4 Thanks! Ben ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/2] datapath: Mega flow implementation
On Thu, Jun 20, 2013 at 10:07 AM, Jesse Gross wrote: > On Wed, Jun 19, 2013 at 11:14 PM, Andy Zhou wrote: > > This patch seems to work fine. Please feel free to push. > > Thanks, I applied the combined patch. > > Sounds good. > > I have two concerns mainly on the style side: > > > > 1. In the same file flow.c, we are mixing the _deferred_ functions with > the > > style of passing 'deferred' as a parameter in the same file. The > > correctness of caller in setting 'deferred' value does not look obviously > > correct. > > I agree that this is not great from a consistency perspective. I > changed the table destroy function to use the same style but left the > actions alone since we only have a deferred destroy function. Other > than conventions though, which part do you think is hard to > understand? > Just how I would guess for some one new to look at this. It is not hard for me now that I have looked at this so much in the last few days. > > > 2. In function ovw_flow_free() > > ... > > ovs_sw_flow_mask_del_ref((struct sw_flow_mask __force *)flow->mask, > > deferred); > > ... > > > > The type cast to (flow->mask) does not look good to me. It also defeats > some > > useful lockdep checks. > > In this case, the semantics are basically that the caller needs to own > the table, which may occur either by holding OVS lock or through the > expiration of an RCU grace period. The latter is hard for this > function to determine, so using the usual ovsl_dereference would > provoke spurious warnings. We have something similar for actions in > __flow_free(), which has the same type of issues. > Yes, I agree it is technically necessary for the cast, just that we will be losing the some benefits of lockdep checks. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] tests: Check for init process name rather than pid
On Ubuntu Saucy based desktops, upstart runs with user sessions enabled which means that the init process under which a daemon might run is not always pid = 1. Check for a process with cmd name of 'init' instead. Signed-off-by: James Page --- tests/daemon-py.at | 10 ++ tests/daemon.at| 10 ++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/daemon-py.at b/tests/daemon-py.at index 39ab3cc..ff8592c 100644 --- a/tests/daemon-py.at +++ b/tests/daemon-py.at @@ -141,8 +141,9 @@ AT_CHECK([test -s daemon]) CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) -CHECK([ps -o ppid= -p `cat monitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) @@ -158,8 +159,9 @@ CHECK([diff olddaemon newdaemon], [1], [ignore]) CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) -CHECK([ps -o ppid= -p `cat newmonitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) diff --git a/tests/daemon.at b/tests/daemon.at index a80cd3e..5ff4476 100644 --- a/tests/daemon.at +++ b/tests/daemon.at @@ -103,8 +103,9 @@ AT_CHECK([test -s daemon]) CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) -CHECK([ps -o ppid= -p `cat monitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) @@ -120,8 +121,9 @@ CHECK([diff olddaemon newdaemon], [1], [ignore]) CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) -CHECK([ps -o ppid= -p `cat newmonitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) -- 1.8.3.1 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH V3] feature: Create specific types for ofp and odp port
On Wed, Jun 19, 2013 at 04:58:44PM -0700, Alex Wang wrote: > Currently datapath ports and openflow ports are both represented by unsigned > integers of various sizes. With implicit casts, etc. it is easy to mix them > up and use one where the other is expected. This commit creates two typedefs > ofp_port_t and odp_port_t. Both of these two types are marked by > "__attribute__((bitwise))" so that Sparse can be used to detect any misuse. > > Signed-off-by: Alex Wang I folded in some incrementals, below, and pushed this to master. The incrementals: * Better protect the *_PORT_C macros. I'd meant them only for integer literals (like UINT32_C, etc.) so I didn't originally put enough parentheses in, but I can see that it is tempting to use them in other places. * Replace one straggling UINT32_MAX by DOPP_NONE in a comment. * Fix dpif_netdev_port_add(), which checked choose_port()'s return value for >= 0, but an odp_port_t is always >= 0. * Fix some places where UINT16_MAX was used for a mask but it had been replaced by OFPP_MAX. It's important for a mask that it be all-1-bits, so UINT16_MAX is appropriate there. * Move code around and adjust style in a couple of places to suit me. Thank you very much! diff --git a/include/openvswitch/types.h b/include/openvswitch/types.h index 71cf468..d07c1e8 100644 --- a/include/openvswitch/types.h +++ b/include/openvswitch/types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010, 2011 Nicira, Inc. + * Copyright (c) 2010, 2011, 2013 Nicira, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -68,8 +68,8 @@ typedef uint32_t OVS_BITWISE odp_port_t; typedef uint32_t OVS_BITWISE ofp11_port_t; /* Macro functions that cast int types to ofp/odp/ofp11 types. */ -#define OFP_PORT_C(X) ((OVS_FORCE ofp_port_t) X) -#define ODP_PORT_C(X) ((OVS_FORCE odp_port_t) X) -#define OFP11_PORT_C(X) ((OVS_FORCE ofp11_port_t) X) +#define OFP_PORT_C(X) ((OVS_FORCE ofp_port_t) (X)) +#define ODP_PORT_C(X) ((OVS_FORCE odp_port_t) (X)) +#define OFP11_PORT_C(X) ((OVS_FORCE ofp11_port_t) (X)) #endif /* openvswitch/types.h */ diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c index bc5697d..fb87f81 100644 --- a/lib/dpif-linux.c +++ b/lib/dpif-linux.c @@ -653,7 +653,7 @@ dpif_linux_port_get_pid(const struct dpif *dpif_, odp_port_t port_no) if (dpif->epoll_fd < 0) { return 0; } else { -/* The UINT32_MAX "reserved" port number uses the "ovs-system"'s +/* The ODPP_NONE "reserved" port number uses the "ovs-system"'s * channel, since it is not heavily loaded. */ uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx; return nl_sock_pid(dpif->channels[idx].sock); diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 1ad454b..8e5e6df 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -219,8 +219,8 @@ create_dpif_netdev(struct dp_netdev *dp) } /* Choose an unused, non-zero port number and return it on success. - * Return 0 on failure. */ -static uint32_t + * Return ODPP_NONE on failure. */ +static odp_port_t choose_port(struct dp_netdev *dp, const char *name) { uint32_t port_no; @@ -243,7 +243,7 @@ choose_port(struct dp_netdev *dp, const char *name) port_no = start_no + strtol(p, NULL, 10); if (port_no > 0 && port_no < MAX_PORTS && !dp->ports[port_no]) { -return port_no; +return u32_to_odp(port_no); } break; } @@ -252,11 +252,11 @@ choose_port(struct dp_netdev *dp, const char *name) for (port_no = 1; port_no < MAX_PORTS; port_no++) { if (!dp->ports[port_no]) { -return port_no; +return u32_to_odp(port_no); } } -return 0; +return ODPP_NONE; } static int @@ -444,21 +444,23 @@ dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev, struct dp_netdev *dp = get_dp_netdev(dpif); char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; const char *dpif_port; -uint32_t port_no = odp_to_u32(*port_nop); +odp_port_t port_no; dpif_port = netdev_vport_get_dpif_port(netdev, namebuf, sizeof namebuf); -if (port_no != UINT32_MAX) { -if (port_no >= MAX_PORTS) { +if (*port_nop != ODPP_NONE) { +uint32_t port_idx = odp_to_u32(*port_nop); +if (port_idx >= MAX_PORTS) { return EFBIG; -} else if (dp->ports[port_no]) { +} else if (dp->ports[port_idx]) { return EBUSY; } +port_no = *port_nop; } else { port_no = choose_port(dp, dpif_port); } -if (port_no > 0) { -*port_nop = u32_to_odp(port_no); -return do_add_port(dp, dpif_port, netdev_get_type(netdev), *port_nop); +if (port_no != ODP
Re: [ovs-dev] [threads 10/11] sparse: Remove prototypes for thread-unsafe functions from headers.
Looks good. On Wed, Jun 19, 2013 at 1:17 PM, Ben Pfaff wrote: > This ensures that attempts to use them cause sparse to complain. > > Signed-off-by: Ben Pfaff > --- > include/sparse/math.h |5 + > include/sparse/netinet/in.h |3 +-- > 2 files changed, 2 insertions(+), 6 deletions(-) > > diff --git a/include/sparse/math.h b/include/sparse/math.h > index f94c527..c2d6156 100644 > --- a/include/sparse/math.h > +++ b/include/sparse/math.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2011 Nicira, Inc. > + * Copyright (c) 2011, 2013 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -108,9 +108,6 @@ double jn(int, double); > double ldexp(double, int); > float ldexpf(float, int); > long double ldexpl(long double, int); > -double lgamma(double); > -float lgammaf(float); > -long double lgammal(long double); > long long llrint(double); > long long llrintf(float); > long long llrintl(long double); > diff --git a/include/sparse/netinet/in.h b/include/sparse/netinet/in.h > index b3924c3..87d48d6 100644 > --- a/include/sparse/netinet/in.h > +++ b/include/sparse/netinet/in.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2011 Nicira, Inc. > + * Copyright (c) 2011, 2013 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -109,7 +109,6 @@ static inline uint16_t ntohs(ovs_be16 x) > > in_addr_t inet_addr(const char *); > int inet_aton (const char *, struct in_addr *); > -char *inet_ntoa(struct in_addr); > const char *inet_ntop(int, const void *, char *, socklen_t); > int inet_pton(int, const char *, void *); > > -- > 1.7.2.5 > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/2] ovs-dpctl: Add mega flow support
Thanks for the updates and pushing it in. On Thu, Jun 20, 2013 at 10:34 AM, Ben Pfaff wrote: > On Tue, Jun 18, 2013 at 04:15:10PM -0700, Andy Zhou wrote: > > Added support to allow mega flow specified and displayed. ovs-dpctl tool > > is mainly used as debugging tool. > > > > This patch also implements the low level user space routines to send > > and receive mega flow netlink messages. Those netlink suppor > > routines are required for forthcoming user space mega flow patches. > > > > Added a unit test to test parsing and display of mega flows. > > > > Ethan contributed the ovs-dpctl mega flow output function. > > > > Co-authored-by: Ethan Jackson > > Signed-off-by: Ethan Jackson > > Signed-off-by: Andy Zhou > > > > --- > > v1->v2 > > Integrated Ethan's patch on ovs-dpctl mega flow output. > > Add Ethan as a co-author for this patch. > > > > v2->v3 > > Rebase to head to make review easier. > > ovs-dpctl: Add mask input for tunnel configurations. > > > > v3->v4 > > fix a typo > > > > v4->v5 > > Add ovs-dpctl unit test cases for parsing mega flow. > > Address Ben's review feedback. > > > > v5->v6 > > Changes for ethtype > > Address Ben's 2nd review feedback. > > I applied this to master. I couldn't resist making a few minor > changes before I did: > > diff --git a/lib/dpif-provider.h b/lib/dpif-provider.h > index 84b5702..666c224 100644 > --- a/lib/dpif-provider.h > +++ b/lib/dpif-provider.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2009, 2010, 2011, 2012 Nicira, Inc. > + * Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -279,15 +279,22 @@ struct dpif_class { > * called again once it returns nonzero within a given iteration (but > the > * 'flow_dump_done' function will be called afterward). > * > - * On success, if 'key' and 'key_len' are nonnull then '*key' and > - * '*key_len' must be set to Netlink attributes with types > OVS_KEY_ATTR_* > - * representing the dumped flow's key; if 'mask' and 'mask_len' are > - * nonnull then '*mask' and '*mask_len' must be set to Netlink > attributes > - * with types of OVS_KEY_ATTR_* representing the dumped flow's mask. > - * If 'actions' and 'actions_len' are nonnull then they should be set > to > - * Netlink attributes with types OVS_ACTION_ATTR_* representing the > dumped > - * flow's actions. If 'stats' is nonnull then it should be set to the > - * dumped flow's statistics. > + * On success: > + * > + * - If 'key' and 'key_len' are nonnull, then '*key' and > '*key_len' > + * must be set to Netlink attributes with types OVS_KEY_ATTR_* > + * representing the dumped flow's key. > + * > + * - If 'mask' and 'mask_len' are nonnull then '*mask' and > '*mask_len' > + * must be set to Netlink attributes with types of > OVS_KEY_ATTR_* > + * representing the dumped flow's mask. > + * > + * - If 'actions' and 'actions_len' are nonnull then they should > be set > + * to Netlink attributes with types OVS_ACTION_ATTR_* > representing > + * the dumped flow's actions. > + * > + * - If 'stats' is nonnull then it should be set to the dumped > flow's > + * statistics. > * > * All of the returned data is owned by 'dpif', not by the caller, > and the > * caller must not modify or free it. 'dpif' must guarantee that it > diff --git a/lib/packets.h b/lib/packets.h > index 543a481..cc9ab3d 100644 > --- a/lib/packets.h > +++ b/lib/packets.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2008, 2009, 2010, 2011, 2012 Nicira, Inc. > + * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -278,11 +278,11 @@ vlan_tci_to_pcp(ovs_be16 vlan_tci) > } > > /* Given the vlan_tci field from an 802.1Q header, in network byte order, > - * returns the Canonical Format Indicator (CFI) in host byte order. */ > + * returns the Canonical Format Indicator (CFI). */ > static inline int > vlan_tci_to_cfi(ovs_be16 vlan_tci) > { > -return (ntohs(vlan_tci) & VLAN_CFI) >> VLAN_CFI_SHIFT; > +return (vlan_tci & htons(VLAN_CFI)) != 0; > } > > #define VLAN_HEADER_LEN 4 > > Thanks! > > Ben > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] tests: Check for init process name rather than pid
Meh - I need to test this better; not working reliably everywhere. On 20/06/13 18:44, James Page wrote: On Ubuntu Saucy based desktops, upstart runs with user sessions enabled which means that the init process under which a daemon might run is not always pid = 1. Check for a process with cmd name of 'init' instead. Signed-off-by: James Page --- tests/daemon-py.at | 10 ++ tests/daemon.at| 10 ++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/daemon-py.at b/tests/daemon-py.at index 39ab3cc..ff8592c 100644 --- a/tests/daemon-py.at +++ b/tests/daemon-py.at @@ -141,8 +141,9 @@ AT_CHECK([test -s daemon]) CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) -CHECK([ps -o ppid= -p `cat monitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) @@ -158,8 +159,9 @@ CHECK([diff olddaemon newdaemon], [1], [ignore]) CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) -CHECK([ps -o ppid= -p `cat newmonitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) diff --git a/tests/daemon.at b/tests/daemon.at index a80cd3e..5ff4476 100644 --- a/tests/daemon.at +++ b/tests/daemon.at @@ -103,8 +103,9 @@ AT_CHECK([test -s daemon]) CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) -CHECK([ps -o ppid= -p `cat monitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) @@ -120,8 +121,9 @@ CHECK([diff olddaemon newdaemon], [1], [ignore]) CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) -CHECK([ps -o ppid= -p `cat newmonitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) -- James Page Ubuntu Core Developer Debian Maintainer james.p...@ubuntu.com ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] tests: Check for init process name rather than pid
I think it might be sufficient to check that the parent pid changed, rather than being picky about the new parent pid or the name of the process associated with that new parent pid. On Thu, Jun 20, 2013 at 07:11:51PM +0100, James Page wrote: > Meh - I need to test this better; not working reliably everywhere. > > On 20/06/13 18:44, James Page wrote: > >On Ubuntu Saucy based desktops, upstart runs with user sessions > >enabled which means that the init process under which a daemon > >might run is not always pid = 1. > > > >Check for a process with cmd name of 'init' instead. > > > >Signed-off-by: James Page > >--- > > tests/daemon-py.at | 10 ++ > > tests/daemon.at| 10 ++ > > 2 files changed, 12 insertions(+), 8 deletions(-) > > > >diff --git a/tests/daemon-py.at b/tests/daemon-py.at > >index 39ab3cc..ff8592c 100644 > >--- a/tests/daemon-py.at > >+++ b/tests/daemon-py.at > >@@ -141,8 +141,9 @@ AT_CHECK([test -s daemon]) > > CHECK([kill -0 `cat daemon`]) > > CHECK([ps -o ppid= -p `cat daemon` > monitor]) > > CHECK([kill -0 `cat monitor`]) > >-CHECK([ps -o ppid= -p `cat monitor` > init]) > >-CHECK([test `cat init` = 1]) > >+CHECK([ps -o ppid= -p `cat monitor` > init.pid]) > >+CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) > >+CHECK([test `cat init` = init]) > > # Kill the daemon process, making it look like a segfault, > > # and wait for a new daemon process to get spawned. > > CHECK([cp daemon olddaemon]) > >@@ -158,8 +159,9 @@ CHECK([diff olddaemon newdaemon], [1], [ignore]) > > CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) > > CHECK([diff monitor newmonitor]) > > CHECK([kill -0 `cat newmonitor`]) > >-CHECK([ps -o ppid= -p `cat newmonitor` > init]) > >-CHECK([test `cat init` = 1]) > >+CHECK([ps -o ppid= -p `cat monitor` > init.pid]) > >+CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) > >+CHECK([test `cat init` = init]) > > # Kill the daemon process with SIGTERM, and wait for the daemon > > # and the monitor processes to go away and the pidfile to get deleted. > > CHECK([kill `cat daemon`], [0], [], [ignore]) > >diff --git a/tests/daemon.at b/tests/daemon.at > >index a80cd3e..5ff4476 100644 > >--- a/tests/daemon.at > >+++ b/tests/daemon.at > >@@ -103,8 +103,9 @@ AT_CHECK([test -s daemon]) > > CHECK([kill -0 `cat daemon`]) > > CHECK([ps -o ppid= -p `cat daemon` > monitor]) > > CHECK([kill -0 `cat monitor`]) > >-CHECK([ps -o ppid= -p `cat monitor` > init]) > >-CHECK([test `cat init` = 1]) > >+CHECK([ps -o ppid= -p `cat monitor` > init.pid]) > >+CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) > >+CHECK([test `cat init` = init]) > > # Kill the daemon process, making it look like a segfault, > > # and wait for a new daemon process to get spawned. > > CHECK([cp daemon olddaemon]) > >@@ -120,8 +121,9 @@ CHECK([diff olddaemon newdaemon], [1], [ignore]) > > CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) > > CHECK([diff monitor newmonitor]) > > CHECK([kill -0 `cat newmonitor`]) > >-CHECK([ps -o ppid= -p `cat newmonitor` > init]) > >-CHECK([test `cat init` = 1]) > >+CHECK([ps -o ppid= -p `cat monitor` > init.pid]) > >+CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) > >+CHECK([test `cat init` = init]) > > # Kill the daemon process with SIGTERM, and wait for the daemon > > # and the monitor processes to go away and the pidfile to get deleted. > > CHECK([kill `cat daemon`], [0], [], [ignore]) > > > > > -- > James Page > Ubuntu Core Developer > Debian Maintainer > james.p...@ubuntu.com > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH net-next 8/8] openvswitch: Add vxlan tunneling support.
On Thu, Jun 20, 2013 at 12:27 AM, Pravin B Shelar wrote: > diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c > new file mode 100644 > index 000..be22134 > --- /dev/null > +++ b/net/openvswitch/vport-vxlan.c > @@ -0,0 +1,220 @@ > +/* > + * Copyright (c) 2013 Nicira, Inc. The version in the out of tree module has a Cisco copyright notice as well. I think it would be polite to keep it here as well. > +/* Called with rcu_read_lock and BH disabled. */ > +static int vxlan_rcv(struct vxlan_handler *vh, struct sk_buff *skb, __be32 > vx_vni) > +{ > + struct vport *vport = vh->data; > + struct iphdr *iph; > + struct ovs_key_ipv4_tunnel tun_key; > + __be64 key; > + > + if (unlikely(!vport)) > + return PACKET_REJECT; Can this ever be NULL if we've found out handler? > +static struct vport *vxlan_tnl_create(const struct vport_parms *parms) > +{ > + struct net *net = ovs_dp_get_net(parms->dp); > + struct nlattr *options = parms->options; > + struct vxlan_port *vxlan_port; > + struct vxlan_handler *vh; > + struct vport *vport; > + struct nlattr *a; > + int err; > + u16 dst_port; > + > + if (!options) { > + err = -EINVAL; > + goto error; > + } > + a = nla_find_nested(options, OVS_TUNNEL_ATTR_DST_PORT); > + if (a && nla_len(a) == sizeof(u16)) { > + dst_port = nla_get_u16(a); > + } else { > + /* Require destination port from userspace. */ > + err = -EINVAL; > + goto error; > + } > + > + vport = ovs_vport_alloc(sizeof(struct vxlan_port), > + &ovs_vxlan_vport_ops, parms); > + if (IS_ERR(vport)) > + return vport; > + > + rtnl_lock(); > + vh = vxlan_add_handler(net, htons(dst_port), vxlan_rcv, > + vport, OVS_VXLAN_RCV_PRIORITY); > + rtnl_unlock(); > + if (IS_ERR(vh)) { > + ovs_vport_free(vport); > + return (void *)vh; > + } > + > + vxlan_port = vxlan_vport(vport); > + strncpy(vxlan_port->name, parms->name, IFNAMSIZ); > + vxlan_port->vh = vh; > + vxlan_port->dst_port = htons(dst_port); I'm somewhat concerned about setting these after we call vxlan_add_handler(), which is the code that has the RCU barriers. We don't access them on receive (where is the only place it matters) but it makes the reasoning a little more complicated than necessary. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [threads 10/11] sparse: Remove prototypes for thread-unsafe functions from headers.
Thanks, applied. On Thu, Jun 20, 2013 at 10:54:44AM -0700, Andy Zhou wrote: > Looks good. > > > On Wed, Jun 19, 2013 at 1:17 PM, Ben Pfaff wrote: > > > This ensures that attempts to use them cause sparse to complain. > > > > Signed-off-by: Ben Pfaff > > --- > > include/sparse/math.h |5 + > > include/sparse/netinet/in.h |3 +-- > > 2 files changed, 2 insertions(+), 6 deletions(-) > > > > diff --git a/include/sparse/math.h b/include/sparse/math.h > > index f94c527..c2d6156 100644 > > --- a/include/sparse/math.h > > +++ b/include/sparse/math.h > > @@ -1,5 +1,5 @@ > > /* > > - * Copyright (c) 2011 Nicira, Inc. > > + * Copyright (c) 2011, 2013 Nicira, Inc. > > * > > * Licensed under the Apache License, Version 2.0 (the "License"); > > * you may not use this file except in compliance with the License. > > @@ -108,9 +108,6 @@ double jn(int, double); > > double ldexp(double, int); > > float ldexpf(float, int); > > long double ldexpl(long double, int); > > -double lgamma(double); > > -float lgammaf(float); > > -long double lgammal(long double); > > long long llrint(double); > > long long llrintf(float); > > long long llrintl(long double); > > diff --git a/include/sparse/netinet/in.h b/include/sparse/netinet/in.h > > index b3924c3..87d48d6 100644 > > --- a/include/sparse/netinet/in.h > > +++ b/include/sparse/netinet/in.h > > @@ -1,5 +1,5 @@ > > /* > > - * Copyright (c) 2011 Nicira, Inc. > > + * Copyright (c) 2011, 2013 Nicira, Inc. > > * > > * Licensed under the Apache License, Version 2.0 (the "License"); > > * you may not use this file except in compliance with the License. > > @@ -109,7 +109,6 @@ static inline uint16_t ntohs(ovs_be16 x) > > > > in_addr_t inet_addr(const char *); > > int inet_aton (const char *, struct in_addr *); > > -char *inet_ntoa(struct in_addr); > > const char *inet_ntop(int, const void *, char *, socklen_t); > > int inet_pton(int, const char *, void *); > > > > -- > > 1.7.2.5 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [threads 11/11] Makefile: Blacklist functions that threaded programs cannot use safely.
On Thu, Jun 20, 2013 at 10:27:01AM -0700, Andy Zhou wrote: > Looks good. Do we need to filter the warnings out from comments if they > are not to be used in the code? It's not easy to reliably filter out warnings from comments. It's usually easy to avoid mentioning these functions in comments. If it becomes a problem, we can do something else. > Not specific to this patch, Why do we assume git is being used? it seems we > can make the Makefile.am more portable otherwise. git is an easy way to get a complete list of source files. Do you know another good way? ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [threads 11/11] Makefile: Blacklist functions that threaded programs cannot use safely.
On Thu, Jun 20, 2013 at 11:23:38AM -0700, Ben Pfaff wrote: > On Thu, Jun 20, 2013 at 10:27:01AM -0700, Andy Zhou wrote: > > Looks good. Do we need to filter the warnings out from comments if they > > are not to be used in the code? > > It's not easy to reliably filter out warnings from comments. It's > usually easy to avoid mentioning these functions in comments. If it > becomes a problem, we can do something else. Oh, I see what you mean now. I guess I did try to filter out comments and forgot that I did that. I wrote this patch over a month ago, even though I only sent it out for the first time this week. When I remove the code that filters out comments, the following lines get flagged: ../lib/random.c:34: * We use this PRNG instead of libc's rand() because rand() varies in quality ../lib/util.c:649: * similar to the POSIX dirname() function but thread-safe. */ ../lib/util.c:667: * similar to the POSIX basename() function but thread-safe. */ ../ovsdb/ovsdb-client.c:704: * going to strtok() it and that's risky with literal "". */ See above for list of calls to functions that are blacklisted due to thread safety issues I think that these comments are valuable enough that I don't want to delete or mangle them. What do you think? Thanks, Ben. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v2.33] datapath: Add basic MPLS support to kernel
On Wed, Jun 19, 2013 at 5:56 PM, Simon Horman wrote: > On Tue, Jun 18, 2013 at 04:06:49PM +0900, Simon Horman wrote: >> Allow datapath to recognize and extract MPLS labels into flow keys >> and execute actions which push, pop, and set labels on packets. >> >> Based heavily on work by Leo Alterman, Ravi K, Isaku Yamahata and Joe >> Stringer. >> >> Cc: Ravi K >> Cc: Leo Alterman >> Cc: Isaku Yamahata >> Cc: Joe Stringer >> Signed-off-by: Simon Horman >> >> --- >> >> This patch depends on "gre: Restructure tunneling" which it aims >> to be compatible with. > > To clarify. The dependency relates to a conflict when applying this patch > which modifies datapath/linux/compat/gso.[ch], files that are > created by "gre: Restructure tunneling". I believe it would > be trivial to reverse the dependency so that this patch creates > those files and "gre: Restructure tunneling" applies on top of it > as the two patches add different functions to those files. > > As such I think it would be better to describe this patch > as compatible with "gre: Restructure tunneling" rather than > dependent on it. I agree that's the case. However, now that the OVS GRE code is upstream, I think I can take just a quick pass over the GRE patch to get it in and then we won't have any dependency issues. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [threads 11/11] Makefile: Blacklist functions that threaded programs cannot use safely.
Make sense! On Thu, Jun 20, 2013 at 12:25 PM, Ben Pfaff wrote: > On Thu, Jun 20, 2013 at 11:23:38AM -0700, Ben Pfaff wrote: > > On Thu, Jun 20, 2013 at 10:27:01AM -0700, Andy Zhou wrote: > > > Looks good. Do we need to filter the warnings out from comments if > they > > > are not to be used in the code? > > > > It's not easy to reliably filter out warnings from comments. It's > > usually easy to avoid mentioning these functions in comments. If it > > becomes a problem, we can do something else. > > Oh, I see what you mean now. I guess I did try to filter out comments and > forgot that I did that. I wrote this patch over a month ago, even > though I only sent it out for the first time this week. > > When I remove the code that filters out comments, the following lines > get flagged: > > ../lib/random.c:34: * We use this PRNG instead of libc's rand() > because rand() varies in quality > ../lib/util.c:649: * similar to the POSIX dirname() function but > thread-safe. */ > ../lib/util.c:667: * similar to the POSIX basename() function but > thread-safe. */ > ../ovsdb/ovsdb-client.c:704: * going to strtok() it and that's > risky with literal "". */ > See above for list of calls to functions that are > blacklisted due to thread safety issues > > I think that these comments are valuable enough that I don't want to > delete or mangle them. What do you think? > > Thanks, > > Ben. > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH V3] feature: Create specific types for ofp and odp port
Thanks very much, Ben, And sorry, I should have put you as co-author in the last commit. On Thu, Jun 20, 2013 at 10:53 AM, Ben Pfaff wrote: > On Wed, Jun 19, 2013 at 04:58:44PM -0700, Alex Wang wrote: > > Currently datapath ports and openflow ports are both represented by > unsigned > > integers of various sizes. With implicit casts, etc. it is easy to mix > them > > up and use one where the other is expected. This commit creates two > typedefs > > ofp_port_t and odp_port_t. Both of these two types are marked by > > "__attribute__((bitwise))" so that Sparse can be used to detect any > misuse. > > > > Signed-off-by: Alex Wang > > I folded in some incrementals, below, and pushed this to master. > > The incrementals: > > * Better protect the *_PORT_C macros. I'd meant them only for > integer literals (like UINT32_C, etc.) so I didn't > originally put enough parentheses in, but I can see that it > is tempting to use them in other places. > I really want to learn the difference here. Could you please explain more why the additional parentheses matter? > * Replace one straggling UINT32_MAX by DOPP_NONE in a comment. > Thanks, for this fix. > * Fix dpif_netdev_port_add(), which checked choose_port()'s > return value for >= 0, but an odp_port_t is always >= 0. > Thanks for this fix, the code looks more consistent. > * Fix some places where UINT16_MAX was used for a mask but it > had been replaced by OFPP_MAX. It's important for a mask > that it be all-1-bits, so UINT16_MAX is appropriate there. > Actually, I think you mean OFPP_NONE here, right? Is it to be more clear that we use "u16_to_ofp(UINT16_MAX)" for "masks.in_port.ofp_port" assignment ? > * Move code around and adjust style in a couple of places to > suit me. > > Thank you very much! > > diff --git a/include/openvswitch/types.h b/include/openvswitch/types.h > index 71cf468..d07c1e8 100644 > --- a/include/openvswitch/types.h > +++ b/include/openvswitch/types.h > @@ -1,5 +1,5 @@ > /* > - * Copyright (c) 2010, 2011 Nicira, Inc. > + * Copyright (c) 2010, 2011, 2013 Nicira, Inc. > * > * Licensed under the Apache License, Version 2.0 (the "License"); > * you may not use this file except in compliance with the License. > @@ -68,8 +68,8 @@ typedef uint32_t OVS_BITWISE odp_port_t; > typedef uint32_t OVS_BITWISE ofp11_port_t; > > /* Macro functions that cast int types to ofp/odp/ofp11 types. */ > -#define OFP_PORT_C(X) ((OVS_FORCE ofp_port_t) X) > -#define ODP_PORT_C(X) ((OVS_FORCE odp_port_t) X) > -#define OFP11_PORT_C(X) ((OVS_FORCE ofp11_port_t) X) > +#define OFP_PORT_C(X) ((OVS_FORCE ofp_port_t) (X)) > +#define ODP_PORT_C(X) ((OVS_FORCE odp_port_t) (X)) > +#define OFP11_PORT_C(X) ((OVS_FORCE ofp11_port_t) (X)) > > #endif /* openvswitch/types.h */ > diff --git a/lib/dpif-linux.c b/lib/dpif-linux.c > index bc5697d..fb87f81 100644 > --- a/lib/dpif-linux.c > +++ b/lib/dpif-linux.c > @@ -653,7 +653,7 @@ dpif_linux_port_get_pid(const struct dpif *dpif_, > odp_port_t port_no) > if (dpif->epoll_fd < 0) { > return 0; > } else { > -/* The UINT32_MAX "reserved" port number uses the "ovs-system"'s > +/* The ODPP_NONE "reserved" port number uses the "ovs-system"'s > * channel, since it is not heavily loaded. */ > uint32_t idx = port_idx >= dpif->uc_array_size ? 0 : port_idx; > return nl_sock_pid(dpif->channels[idx].sock); > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 1ad454b..8e5e6df 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -219,8 +219,8 @@ create_dpif_netdev(struct dp_netdev *dp) > } > > /* Choose an unused, non-zero port number and return it on success. > - * Return 0 on failure. */ > -static uint32_t > + * Return ODPP_NONE on failure. */ > +static odp_port_t > choose_port(struct dp_netdev *dp, const char *name) > { > uint32_t port_no; > @@ -243,7 +243,7 @@ choose_port(struct dp_netdev *dp, const char *name) > port_no = start_no + strtol(p, NULL, 10); > if (port_no > 0 && port_no < MAX_PORTS > && !dp->ports[port_no]) { > -return port_no; > +return u32_to_odp(port_no); > } > break; > } > @@ -252,11 +252,11 @@ choose_port(struct dp_netdev *dp, const char *name) > > for (port_no = 1; port_no < MAX_PORTS; port_no++) { > if (!dp->ports[port_no]) { > -return port_no; > +return u32_to_odp(port_no); > } > } > > -return 0; > +return ODPP_NONE; > } > > static int > @@ -444,21 +444,23 @@ dpif_netdev_port_add(struct dpif *dpif, struct > netdev *netdev, > struct dp_netdev *dp = get_dp_netdev(dpif); > char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; > const char *dpif_port; > -uint32_t port_no = odp_to_u32(*port_nop); > +
Re: [ovs-dev] [PATCH V3] feature: Create specific types for ofp and odp port
On Thu, Jun 20, 2013 at 12:35:57PM -0700, Alex Wang wrote: > And sorry, I should have put you as co-author in the last commit. I don't think it was warranted, you did most of the work. > On Thu, Jun 20, 2013 at 10:53 AM, Ben Pfaff wrote: > > > On Wed, Jun 19, 2013 at 04:58:44PM -0700, Alex Wang wrote: > > > Currently datapath ports and openflow ports are both represented by > > unsigned > > > integers of various sizes. With implicit casts, etc. it is easy to mix > > them > > > up and use one where the other is expected. This commit creates two > > typedefs > > > ofp_port_t and odp_port_t. Both of these two types are marked by > > > "__attribute__((bitwise))" so that Sparse can be used to detect any > > misuse. > > > > > > Signed-off-by: Alex Wang > > > > I folded in some incrementals, below, and pushed this to master. > > > > The incrementals: > > > > * Better protect the *_PORT_C macros. I'd meant them only for > > integer literals (like UINT32_C, etc.) so I didn't > > originally put enough parentheses in, but I can see that it > > is tempting to use them in other places. > > > > > I really want to learn the difference here. Could you please explain more > why the additional parentheses matter? Casts have higher precedence that most other operators, so (type)x and (type)(x) can be different. For example, (uint32_t)UINT64_MAX >> 1 has value UINT32_MAX/2, but (uint32_t)(UINT64_MAX >> 1) has value UINT32_MAX, if I haven't screwed anything up. > > * Fix some places where UINT16_MAX was used for a mask but it > > had been replaced by OFPP_MAX. It's important for a mask > > that it be all-1-bits, so UINT16_MAX is appropriate there. >> > Actually, I think you mean OFPP_NONE here, right? Oops, yes. > Is it to be more clear that we use "u16_to_ofp(UINT16_MAX)" for > "masks.in_port.ofp_port" assignment ? Yes, thanks for the correction. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [threads 02/11] ovs-thread: Add per-thread data support.
On Wed, Jun 19, 2013 at 01:17:03PM -0700, Ben Pfaff wrote: > POSIX defines a portable pthread_key_t API for per-thread data. GCC and > C11 have two different forms of per-thread data that are generally faster > than the POSIX API, where they are available. This commit adds a > macro-based wrapper, DEFINE_PER_THREAD_DATA, that takes advantage of the > GCC extension where it is available and falls back to the POSIX API > otherwise. (I'm not aware of any compilers that implement the C11 feature, > so this commit doesn't try to use it.) Ed Maste pointed out off-list that clang on FreeBSD supports _Thread_local. Here's a revised version of the patch that supports both _Thread_local and __thread. I've also updated the "reviews" branch. --8<--cut here-->8-- From: Ben Pfaff Date: Thu, 20 Jun 2013 13:11:26 -0700 Subject: [PATCH] ovs-thread: Add per-thread data support. POSIX defines a portable pthread_key_t API for per-thread data. GCC and C11 have two different forms of per-thread data that are generally faster than the POSIX API, where they are available. This commit adds a macro-based wrapper, DEFINE_PER_THREAD_DATA, that takes advantage of these features where they are available and falls back to the POSIX API otherwise. The Clang compiler implements the C11 _Thread_local keyword. This commit also adds a convenience wrapper for the POSIX API, via the DEFINE_PER_THREAD_MALLOCED_DATA macro. Signed-off-by: Ben Pfaff --- configure.ac |1 + lib/ovs-thread.h | 194 + m4/openvswitch.m4 | 37 ++- 3 files changed, 231 insertions(+), 1 deletions(-) diff --git a/configure.ac b/configure.ac index a691963..6c610c1 100644 --- a/configure.ac +++ b/configure.ac @@ -80,6 +80,7 @@ OVS_CHECK_XENSERVER_VERSION OVS_CHECK_GROFF OVS_CHECK_GNU_MAKE OVS_CHECK_CACHE_TIME +OVS_CHECK__THREAD_LOCAL OVS_ENABLE_OPTION([-Wall]) OVS_ENABLE_OPTION([-Wno-sign-compare]) diff --git a/lib/ovs-thread.h b/lib/ovs-thread.h index cafeedf..d5bf048 100644 --- a/lib/ovs-thread.h +++ b/lib/ovs-thread.h @@ -85,5 +85,199 @@ void xpthread_cond_wait(pthread_cond_t *, pthread_mutex_t *mutex) void xpthread_key_create(pthread_key_t *, void (*destructor)(void *)); void xpthread_create(pthread_t *, pthread_attr_t *, void *(*)(void *), void *); + +/* Per-thread data. + * + * Multiple forms of per-thread data exist, each with its own pluses and + * minuses: + * + * - POSIX per-thread data via pthread_key_t is portable to any pthreads + * implementation, and allows a destructor function to be defined. It + * only (directly) supports per-thread pointers, which are always + * initialized to NULL. It requires once-only allocation of a + * pthread_key_t value. It is relatively slow. + * + * - The _Thread_local keyword newly defined in C11 works with any data + * type and initializer, and it is fast. _Thread_local does not require + * once-only initialization like pthread_key_t. C11 does not define what + * happens if one attempts to access a _Thread_local object from a thread + * other than the one to which that object belongs. There is no + * provision to call a user-specified destructor when a thread ends. + * + * - The __thread keyword is a GCC extension similar to _Thread_local but + * with a longer history. __thread is not portable to every GCC version + * or environment. __thread does not restrict the use of a thread-local + * object outside its own thread. + * + * Here's a handy summary: + * + * pthread_key_t _Thread_local__thread + * - - - + * portability high low medium + * speedlow highhigh + * supports destructors?yesno no + * needs key allocation?yesno no + * arbitrary initializer?no yes yes + * cross-thread access? yesno yes + */ + +/* DEFINE_PER_THREAD_DATA(TYPE, NAME, INITIALIZER). + * + * One should prefer to use POSIX per-thread data, via pthread_key_t, when its + * performance is acceptable, because of its portability (see the table above). + * This macro is an alternatives that takes advantage of _Thread_local (and + * __thread, via a config.h macro substitution), for its performance, when it + * is available, and falls back to POSIX per-thread data otherwise. + * + * Defines per-thread variable NAME with the given TYPE, initialized to + * INITIALIZER (which must be valid as an initializer for a variable with + * static lifetime). + * + * The public interface to the variable is: + * + *TYPE *NAME_get(void) + *TYPE *NAME_get__(void) + * + * Returns the address of this thread's in
Re: [ovs-dev] [RFC PATCH 1/8] ofp-util: Remove prototype of non-existing function.
On Thu, Jun 20, 2013 at 05:26:16PM +0300, Jarno Rajahalme wrote: > > Signed-off-by: Jarno Rajahalme Applied, thanks. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [RFC PATCH 2/8] ofpacts_check: Remove unnecessary flow copying.
On Thu, Jun 20, 2013 at 05:26:17PM +0300, Jarno Rajahalme wrote: > > Signed-off-by: Jarno Rajahalme I have mixed feelings here. On one hand, it is nice to avoid a big copy in some cases (rare cases really). But on the other hand I am not fond of interfaces that temporarily modify data, even if they change it back later, and the pitfalls of such an approach are more of a problem as threading comes into the picture (and we are currently in the midst of threading OVS). What do you think? ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH net-next 0/8] openvswitch: Add vxlan tunneling support.
On Thu, Jun 20, 2013 at 10:13 AM, Stephen Hemminger wrote: > On Thu, 20 Jun 2013 08:58:42 -0700 > Stephen Hemminger wrote: > >> On Thu, 20 Jun 2013 00:26:15 -0700 >> Pravin B Shelar wrote: >> >> > First two patches fixes vxlan issues. Next two patches >> > extends vxlan so that openvswitch can share vxlan udp port >> > with vxlan module. Rest of patches refactors vxlan data >> > plane so that ovs can share that code with vxlan module. >> > Last patch adds vxlan-vport to openvswitch. >> > >> > Pravin B Shelar (8): >> > vxlan: Fix error handling while creating device. >> > vxlan: Fix sparse wornings. >> > vxlan: Allow multiple recieve handlers. >> > vxlan: Extend vxlan handlers for openvswitch. >> > vxlan: Factor out vxlan send api. >> > vxlan: Improve vxlan headroom calculation. >> > vxlan: Add tx-vlan offload support. >> > openvswitch: Add vxlan tunneling support. >> > >> > drivers/net/vxlan.c | 337 >> > +- >> > include/net/vxlan.h | 33 >> > include/uapi/linux/openvswitch.h | 11 ++ >> > net/openvswitch/Kconfig |1 + >> > net/openvswitch/Makefile |3 +- >> > net/openvswitch/vport-vxlan.c| 220 + >> > net/openvswitch/vport.c |3 + >> > net/openvswitch/vport.h |1 + >> > 8 files changed, 490 insertions(+), 119 deletions(-) >> > create mode 100644 include/net/vxlan.h >> > create mode 100644 net/openvswitch/vport-vxlan.c >> >> I will merge the first 7 into my vxlan repository and send a pull request. > > The patch "vxlan: Allow multiple recieve handlers." and beyond conflict > with the changes to manage socket in work queue. > > Could you rebase them against > git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/vxlan-next.git > > The repo may not be available right now because kernel.org hasn't mirrored > it out yet. Can you sync your tree with net-next, it has ovs changes that are required for ovs-vxlan changes. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] tests: Check that parent process ID changes
On Ubuntu Saucy based desktops, upstart runs with user sessions enabled which means that the init process under which a daemon might run is not always pid = 1. Instead of checking for pid = 1, check to ensure that the parent pid of the monitor is not the pid of the shell that started it. Signed-off-by: James Page --- tests/daemon-py.at | 4 ++-- tests/daemon.at| 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/daemon-py.at b/tests/daemon-py.at index 39ab3cc..9bc7810 100644 --- a/tests/daemon-py.at +++ b/tests/daemon-py.at @@ -142,7 +142,7 @@ CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) CHECK([ps -o ppid= -p `cat monitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([test `cat init` != $$]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) @@ -159,7 +159,7 @@ CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) CHECK([ps -o ppid= -p `cat newmonitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([test `cat init` != $$]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) diff --git a/tests/daemon.at b/tests/daemon.at index a80cd3e..7c30e10 100644 --- a/tests/daemon.at +++ b/tests/daemon.at @@ -104,7 +104,7 @@ CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) CHECK([ps -o ppid= -p `cat monitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([test `cat init` != $$]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) @@ -121,7 +121,7 @@ CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) CHECK([ps -o ppid= -p `cat newmonitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([test `cat init` != $$]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) -- 1.8.3.1 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] tests: Check for init process name rather than pid
OK - resubmitted with suggested approach. Cheers James On 20/06/13 19:14, Ben Pfaff wrote: I think it might be sufficient to check that the parent pid changed, rather than being picky about the new parent pid or the name of the process associated with that new parent pid. On Thu, Jun 20, 2013 at 07:11:51PM +0100, James Page wrote: Meh - I need to test this better; not working reliably everywhere. On 20/06/13 18:44, James Page wrote: On Ubuntu Saucy based desktops, upstart runs with user sessions enabled which means that the init process under which a daemon might run is not always pid = 1. Check for a process with cmd name of 'init' instead. Signed-off-by: James Page --- tests/daemon-py.at | 10 ++ tests/daemon.at| 10 ++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/daemon-py.at b/tests/daemon-py.at index 39ab3cc..ff8592c 100644 --- a/tests/daemon-py.at +++ b/tests/daemon-py.at @@ -141,8 +141,9 @@ AT_CHECK([test -s daemon]) CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) -CHECK([ps -o ppid= -p `cat monitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) @@ -158,8 +159,9 @@ CHECK([diff olddaemon newdaemon], [1], [ignore]) CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) -CHECK([ps -o ppid= -p `cat newmonitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) diff --git a/tests/daemon.at b/tests/daemon.at index a80cd3e..5ff4476 100644 --- a/tests/daemon.at +++ b/tests/daemon.at @@ -103,8 +103,9 @@ AT_CHECK([test -s daemon]) CHECK([kill -0 `cat daemon`]) CHECK([ps -o ppid= -p `cat daemon` > monitor]) CHECK([kill -0 `cat monitor`]) -CHECK([ps -o ppid= -p `cat monitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process, making it look like a segfault, # and wait for a new daemon process to get spawned. CHECK([cp daemon olddaemon]) @@ -120,8 +121,9 @@ CHECK([diff olddaemon newdaemon], [1], [ignore]) CHECK([ps -o ppid= -p `cat daemon` > newmonitor]) CHECK([diff monitor newmonitor]) CHECK([kill -0 `cat newmonitor`]) -CHECK([ps -o ppid= -p `cat newmonitor` > init]) -CHECK([test `cat init` = 1]) +CHECK([ps -o ppid= -p `cat monitor` > init.pid]) +CHECK([ps -o cmd= -p `cat init.pid` | awk '{ print $1 }' > init]) +CHECK([test `cat init` = init]) # Kill the daemon process with SIGTERM, and wait for the daemon # and the monitor processes to go away and the pidfile to get deleted. CHECK([kill `cat daemon`], [0], [], [ignore]) -- James Page Ubuntu Core Developer Debian Maintainer james.p...@ubuntu.com ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev -- James Page Ubuntu Core Developer Debian Maintainer james.p...@ubuntu.com ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH V3] feature: Create specific types for ofp and odp port
Thanks for the explanation, On Thu, Jun 20, 2013 at 12:47 PM, Ben Pfaff wrote: > On Thu, Jun 20, 2013 at 12:35:57PM -0700, Alex Wang wrote: > > And sorry, I should have put you as co-author in the last commit. > > I don't think it was warranted, you did most of the work. > > > On Thu, Jun 20, 2013 at 10:53 AM, Ben Pfaff wrote: > > > > > On Wed, Jun 19, 2013 at 04:58:44PM -0700, Alex Wang wrote: > > > > Currently datapath ports and openflow ports are both represented by > > > unsigned > > > > integers of various sizes. With implicit casts, etc. it is easy to > mix > > > them > > > > up and use one where the other is expected. This commit creates two > > > typedefs > > > > ofp_port_t and odp_port_t. Both of these two types are marked by > > > > "__attribute__((bitwise))" so that Sparse can be used to detect any > > > misuse. > > > > > > > > Signed-off-by: Alex Wang > > > > > > I folded in some incrementals, below, and pushed this to master. > > > > > > The incrementals: > > > > > > * Better protect the *_PORT_C macros. I'd meant them only for > > > integer literals (like UINT32_C, etc.) so I didn't > > > originally put enough parentheses in, but I can see that it > > > is tempting to use them in other places. > > > > > > > > > I really want to learn the difference here. Could you please explain more > > why the additional parentheses matter? > > Casts have higher precedence that most other operators, so (type)x and > (type)(x) can be different. For example, (uint32_t)UINT64_MAX >> 1 has > value UINT32_MAX/2, but (uint32_t)(UINT64_MAX >> 1) has value > UINT32_MAX, if I haven't screwed anything up. This makes sense. Thanks, > > > * Fix some places where UINT16_MAX was used for a mask but it > > > had been replaced by OFPP_MAX. It's important for a mask > > > that it be all-1-bits, so UINT16_MAX is appropriate there. > >> > > Actually, I think you mean OFPP_NONE here, right? > > Oops, yes. > > > Is it to be more clear that we use "u16_to_ofp(UINT16_MAX)" for > > "masks.in_port.ofp_port" assignment ? > > Yes, thanks for the correction. > Then, there is one leftover in tests/test-classifier.c, I'll patch it quickly, ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] test-classifier.c: Use UINT16_MAX instead of OFPP_NONE in mask assignment
It is more comprehensible to use UINT16_MAX in mask assignment than OFPP_NONE. Signed-off-by: Alex Wang --- tests/test-classifier.c |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test-classifier.c b/tests/test-classifier.c index cd081a5..601aaf8 100644 --- a/tests/test-classifier.c +++ b/tests/test-classifier.c @@ -548,7 +548,7 @@ make_rule(int wc_fields, unsigned int priority, int value_pat) } else if (f_idx == CLS_F_IDX_DL_TYPE) { match.wc.masks.dl_type = htons(UINT16_MAX); } else if (f_idx == CLS_F_IDX_IN_PORT) { -match.wc.masks.in_port.ofp_port = OFPP_NONE; +match.wc.masks.in_port.ofp_port = u16_to_ofp(UINT16_MAX); } else { NOT_REACHED(); } -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] FAQ: Describe how to use ovs-ofctl with OpenFlow 1.1+.
Signed-off-by: Ben Pfaff --- FAQ |5 + 1 files changed, 5 insertions(+), 0 deletions(-) diff --git a/FAQ b/FAQ index 1f0f841..7df4e90 100644 --- a/FAQ +++ b/FAQ @@ -940,6 +940,11 @@ A: Open vSwitch 1.9 and earlier support only OpenFlow 1.0 (plus ovs-vsctl set bridge br0 protocols=OpenFlow10,OpenFlow12,OpenFlow13 + Use the -O option to enable support for later versions of OpenFlow + in ovs-ofctl. For example: + + ovs-ofctl -O OpenFlow13 dump-flows br0 + Support for OpenFlow 1.1 is incomplete enough that it cannot yet be enabled, even experimentally. -- 1.7.2.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 2/4] ofproto-dpif: Handle failed flow 'put's.
If a flow cannot be installed in the datapath, we should notice this and not treat it as installed. This becomes an issue with megaflows, since a batch of unique flows may come in that generate a single new datapath megaflow that covers them. Since userspace doesn't know whether the datapath supports megaflows, each unique flow will get a separate flow entry (which overlap when masks are applied) and all except the first will get rejected by a megaflow- supporting datapath as duplicates. Signed-off-by: Justin Pettit --- lib/dpif.c |7 ++- ofproto/ofproto-dpif.c | 24 ++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/lib/dpif.c b/lib/dpif.c index 4193dcb..fa62cec 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1261,7 +1261,12 @@ log_operation(const struct dpif *dpif, const char *operation, int error) static enum vlog_level flow_message_log_level(int error) { -return error ? VLL_WARN : VLL_DBG; +/* If flows arrive in a batch, userspace may push down multiple + * unique flow definitions that overlap when wildcards are applied. + * Kernels that support flow wildcarding will reject these flows as + * duplicates (EINVAL), so lower the log level to debug for these + * types of messages. */ +return (error && error != EINVAL) ? VLL_WARN : VLL_DBG; } static bool diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 6fa7894..7ab3e7f 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -69,6 +69,7 @@ COVERAGE_DEFINE(facet_changed_rule); COVERAGE_DEFINE(facet_revalidate); COVERAGE_DEFINE(facet_unexpected); COVERAGE_DEFINE(facet_suppress); +COVERAGE_DEFINE(subfacet_install_fail); struct flow_miss; struct facet; @@ -3245,6 +3246,10 @@ struct flow_miss_op { uint64_t slow_stub[128 / 8]; /* Buffer for compose_slow_path() */ struct xlate_out xout; bool xout_garbage; /* 'xout' needs to be uninitialized? */ + +/* If this is a "put" op, then a pointer to the subfacet that should + * be destroyed if the operation fails. */ +struct subfacet *subfacet; }; /* Sends an OFPT_PACKET_IN message for 'packet' of type OFPR_NO_MATCH to each @@ -3307,6 +3312,7 @@ init_flow_miss_execute_op(struct flow_miss *miss, struct ofpbuf *packet, eth_pop_vlan(packet); } +op->subfacet = NULL; op->xout_garbage = false; op->dpif_op.type = DPIF_OP_EXECUTE; op->dpif_op.u.execute.key = miss->key; @@ -3461,6 +3467,7 @@ handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet, op->xout_garbage = false; op->dpif_op.type = DPIF_OP_FLOW_PUT; +op->subfacet = subfacet; put->flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; put->key = miss->key; put->key_len = miss->key_len; @@ -3771,8 +3778,19 @@ handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls, } dpif_operate(backer->dpif, dpif_ops, n_ops); -/* Free memory. */ for (i = 0; i < n_ops; i++) { +/* Destroy subfacets for flows that weren't installed. */ +if (dpif_ops[i]->error != 0 +&& flow_miss_ops[i].dpif_op.type == DPIF_OP_FLOW_PUT +&& flow_miss_ops[i].subfacet) { +struct subfacet *subfacet = flow_miss_ops[i].subfacet; + +COVERAGE_INC(subfacet_install_fail); + +subfacet->path = SF_NOT_INSTALLED; +} + +/* Free memory. */ if (flow_miss_ops[i].xout_garbage) { xlate_out_uninit(&flow_miss_ops[i].xout); } @@ -5037,7 +5055,9 @@ subfacet_install(struct subfacet *subfacet, const struct ofpbuf *odp_actions, subfacet_reset_dp_stats(subfacet, stats); } -if (!ret) { +if (ret) { +COVERAGE_INC(subfacet_install_fail); +} else { subfacet->path = path; } return ret; -- 1.7.5.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/4] odp-util: Introduce odp_flow_key_from_mask().
Add a new function for converting a mask into a set of OVS_KEY_ATTR* attributes. This will be useful in a future commit. Signed-off-by: Justin Pettit --- lib/odp-util.c | 139 --- lib/odp-util.h |2 + 2 files changed, 93 insertions(+), 48 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index d70cf17..7bffa2b 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2249,45 +2249,44 @@ ovs_to_odp_frag(uint8_t nw_frag) : OVS_FRAG_TYPE_LATER); } -/* Appends a representation of 'flow' as OVS_KEY_ATTR_* attributes to 'buf'. - * 'flow->in_port' is ignored (since it is likely to be an OpenFlow port - * number rather than a datapath port number). Instead, if 'odp_in_port' - * is anything other than ODPP_NONE, it is included in 'buf' as the input - * port. - * - * 'buf' must have at least ODPUTIL_FLOW_KEY_BYTES bytes of space, or be - * capable of being expanded to allow for that much space. */ -void -odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, - odp_port_t odp_in_port) +static void +odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, + const struct flow *flow, odp_port_t odp_in_port) { +bool is_mask; struct ovs_key_ethernet *eth_key; size_t encap; +/* We assume that if 'data' and 'flow' are not the same, we should + * treat 'data' as a mask. */ +is_mask = (data != flow); + if (flow->skb_priority) { -nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, flow->skb_priority); +nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); } if (flow->tunnel.ip_dst) { -tun_key_to_attr(buf, &flow->tunnel); +tun_key_to_attr(buf, &data->tunnel); } if (flow->skb_mark) { -nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, flow->skb_mark); +nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->skb_mark); } -if (odp_in_port != ODPP_NONE) { +/* Add an ingress port attribute if this is a mask or 'odp_in_port' + * is not the magical value "ODPP_NONE". */ +if (is_mask || odp_in_port != ODPP_NONE) { nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port); } eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET, sizeof *eth_key); -memcpy(eth_key->eth_src, flow->dl_src, ETH_ADDR_LEN); -memcpy(eth_key->eth_dst, flow->dl_dst, ETH_ADDR_LEN); +memcpy(eth_key->eth_src, data->dl_src, ETH_ADDR_LEN); +memcpy(eth_key->eth_dst, data->dl_dst, ETH_ADDR_LEN); if (flow->vlan_tci != htons(0) || flow->dl_type == htons(ETH_TYPE_VLAN)) { nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_TYPE_VLAN)); -nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, flow->vlan_tci); +nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, data->vlan_tci); encap = nl_msg_start_nested(buf, OVS_KEY_ATTR_ENCAP); if (flow->vlan_tci == htons(0)) { goto unencap; @@ -2297,33 +2296,47 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, } if (ntohs(flow->dl_type) < ETH_TYPE_MIN) { +/* For backwards compatibility with kernels that don't support + * wildcarding, the following convention is used to encode the + * OVS_KEY_ATTR_ETHERTYPE for key and mask: + * + * key maskmatches + * --- + * >0x5ff 0x Specified Ethernet II Ethertype. + * >0x5ff 0 Any Ethernet II or non-Ethernet II frame. + * 0x Any non-Ethernet II frame (except valid + *802.3 SNAP packet with valid eth_type). + */ +if (is_mask) { +nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type); +} goto unencap; } -nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, flow->dl_type); +nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type); if (flow->dl_type == htons(ETH_TYPE_IP)) { struct ovs_key_ipv4 *ipv4_key; ipv4_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_IPV4, sizeof *ipv4_key); -ipv4_key->ipv4_src = flow->nw_src; -ipv4_key->ipv4_dst = flow->nw_dst; -ipv4_key->ipv4_proto = flow->nw_proto; -ipv4_key->ipv4_tos = flow->nw_tos; -ipv4_key->ipv4_ttl = flow->nw_ttl; -ipv4_key->ipv4_frag = ovs_to_odp_frag(flow->nw_frag); +ipv4_key->ipv4_src = data->nw_src; +ipv4_key->ipv4_dst = data->nw_dst; +ipv4_key->ipv4_proto = data->nw_proto; +ipv4_key->ipv4_tos = data->nw_tos; +ipv4_key->ipv4_ttl = data->nw_ttl; +ipv4_key->ipv4_frag = ovs_to_odp_frag(data->nw_frag); } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { struct ovs_key_ipv6 *ipv6_key; ipv6_key = nl_msg_put_unspec_uninit(buf, OVS_K
[ovs-dev] [PATCH 3/4] dpif: Log flow masks for "put" and "dump_next".
When debugging the system, it's useful to not just see the key but also the mask. Signed-off-by: Justin Pettit --- lib/dpif.c | 15 +-- ofproto/ofproto-dpif.c |6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/dpif.c b/lib/dpif.c index fa62cec..c5bf746 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -81,6 +81,7 @@ static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5); static void log_flow_message(const struct dpif *dpif, int error, const char *operation, const struct nlattr *key, size_t key_len, + const struct nlattr *mask, size_t mask_len, const struct dpif_flow_stats *stats, const struct nlattr *actions, size_t actions_len); static void log_operation(const struct dpif *, const char *operation, @@ -808,8 +809,8 @@ dpif_flow_get(const struct dpif *dpif, actions = NULL; actions_len = 0; } -log_flow_message(dpif, error, "flow_get", key, key_len, stats, - actions, actions_len); +log_flow_message(dpif, error, "flow_get", key, key_len, + NULL, 0, stats, actions, actions_len); } return error; } @@ -982,6 +983,7 @@ dpif_flow_dump_next(struct dpif_flow_dump *dump, } else if (should_log_flow_message(error)) { log_flow_message(dpif, error, "flow_dump", key ? *key : NULL, key ? *key_len : 0, + mask ? *mask : NULL, mask ? *mask_len : 0, stats ? *stats : NULL, actions ? *actions : NULL, actions ? *actions_len : 0); } @@ -1279,6 +1281,7 @@ should_log_flow_message(int error) static void log_flow_message(const struct dpif *dpif, int error, const char *operation, const struct nlattr *key, size_t key_len, + const struct nlattr *mask, size_t mask_len, const struct dpif_flow_stats *stats, const struct nlattr *actions, size_t actions_len) { @@ -1291,7 +1294,7 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation, if (error) { ds_put_format(&ds, "(%s) ", strerror(error)); } -odp_flow_key_format(key, key_len, &ds); +odp_flow_format(key, key_len, mask, mask_len, &ds); if (stats) { ds_put_cstr(&ds, ", "); dpif_flow_stats_format(stats, &ds); @@ -1323,8 +1326,8 @@ log_flow_put_message(struct dpif *dpif, const struct dpif_flow_put *put, ds_put_cstr(&s, "[zero]"); } log_flow_message(dpif, error, ds_cstr(&s), - put->key, put->key_len, put->stats, - put->actions, put->actions_len); + put->key, put->key_len, put->mask, put->mask_len, + put->stats, put->actions, put->actions_len); ds_destroy(&s); } } @@ -1335,7 +1338,7 @@ log_flow_del_message(struct dpif *dpif, const struct dpif_flow_del *del, { if (should_log_flow_message(error)) { log_flow_message(dpif, error, "flow_del", del->key, del->key_len, - !error ? del->stats : NULL, NULL, 0); + NULL, 0, !error ? del->stats : NULL, NULL, 0); } } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 7ab3e7f..8dbda93 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -4137,12 +4137,12 @@ update_stats(struct dpif_backer *backer) { const struct dpif_flow_stats *stats; struct dpif_flow_dump dump; -const struct nlattr *key; -size_t key_len; +const struct nlattr *key, *mask; +size_t key_len, mask_len; dpif_flow_dump_start(&dump, backer->dpif); while (dpif_flow_dump_next(&dump, &key, &key_len, - NULL, NULL, NULL, NULL, &stats)) { + &mask, &mask_len, NULL, NULL, &stats)) { struct subfacet *subfacet; uint32_t key_hash; -- 1.7.5.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 4/4] ofproto-dpif: Use megaflows.
The commit configures the masks generated from megaflows and pushes them through the dpif layer. With this commit and a wildcard supporting OVS kernel module, ovs-vswitchd's flow setup rate is very close to that of the Linux bridge. Signed-off-by: Justin Pettit --- NEWS |4 PORTING| 32 ofproto/ofproto-dpif.c | 23 +++ 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/NEWS b/NEWS index aae3378..7b43447 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,10 @@ post-v1.11.0 v1.11.0 - xx xxx - +- Support for megaflows, which allows wildcarding in the kernel (and + any dpif implementation that supports wildcards). Depending on + the flow table and switch configuration, flow set up rates are + close to the Linux bridge. - The "tutorial" directory contains a new tutorial for some advanced Open vSwitch features. - Stable bond mode has been removed. diff --git a/PORTING b/PORTING index ffde296..53c19d3 100644 --- a/PORTING +++ b/PORTING @@ -148,9 +148,9 @@ depends on a few different factors: * A dpif provider is usually easier to implement, but most appropriate for software switching. It "explodes" wildcard - rules into exact-match entries. This allows fast hash lookups - in software, but makes inefficient use of TCAMs in hardware - that support wildcarding. + rules into exact-match entries (with an optional wildcard mask). + This allows fast hash lookups in software, but makes + inefficient use of TCAMs in hardware that support wildcarding. The following sections describe how to implement each kind of port. @@ -175,18 +175,26 @@ Writing a dpif Provider Open vSwitch has a built-in ofproto provider named "ofproto-dpif", which is built on top of a library for manipulating datapaths, called -"dpif". A "datapath" is a simple flow table, one that supports only -exact-match flows, that is, flows without wildcards. When a packet -arrives on a network device, the datapath looks for it in this -exact-match table. If there is a match, then it performs the -associated actions. If there is no match, the datapath passes the -packet up to ofproto-dpif, which maintains an OpenFlow flow table -(that supports wildcards). If the packet matches in this flow table, -then ofproto-dpif executes its actions and inserts a new exact-match -entry into the dpif flow table. (Otherwise, ofproto-dpif passes the +"dpif". A "datapath" is a simple flow table, one that is only required +to support exact-match flows, that is, flows without wildcards. When a +packet arrives on a network device, the datapath looks for it in this +table. If there is a match, then it performs the associated actions. +If there is no match, the datapath passes the packet up to ofproto-dpif, +which maintains the full OpenFlow flow table. If the packet matches in +this flow table, then ofproto-dpif executes its actions and inserts a +new entry into the dpif flow table. (Otherwise, ofproto-dpif passes the packet up to ofproto to send the packet to the OpenFlow controller, if one is configured.) +When calculating the dpif flow, ofproto-dpif generates an exact-match +flow that describes the missed packet. It makes an effort to figure out +what fields can be wildcarded based on the switch's configuration and +OpenFlow flow table. The dpif is free to ignore the suggested wildcards +and only support the exact-match entry. However, if the dpif supports +wildcarding, then it can use the masks to match multiple flows with +fewer entries and potentially significantly reduce the number of flow +misses handled by ofproto-dpif. + The "dpif" library in turn delegates much of its functionality to a "dpif provider". The following diagram shows how dpif providers fit into the Open vSwitch architecture: diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 8dbda93..9eed040 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -3247,6 +3247,9 @@ struct flow_miss_op { struct xlate_out xout; bool xout_garbage; /* 'xout' needs to be uninitialized? */ +struct ofpbuf mask; /* Flow mask for "put" ops. */ +struct odputil_keybuf maskbuf; + /* If this is a "put" op, then a pointer to the subfacet that should * be destroyed if the operation fails. */ struct subfacet *subfacet; @@ -3318,6 +3321,7 @@ init_flow_miss_execute_op(struct flow_miss *miss, struct ofpbuf *packet, op->dpif_op.u.execute.key = miss->key; op->dpif_op.u.execute.key_len = miss->key_len; op->dpif_op.u.execute.packet = packet; +ofpbuf_use_stack(&op->mask, &op->maskbuf, sizeof op->maskbuf); } /* Helper for handle_flow_miss_without_facet() and @@ -3465,14 +3469,19 @@ handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet, subfacet->path = want_path; +
Re: [ovs-dev] [PATCH] test-classifier.c: Use UINT16_MAX instead of OFPP_NONE in mask assignment
On Thu, Jun 20, 2013 at 03:10:04PM -0700, Alex Wang wrote: > It is more comprehensible to use UINT16_MAX in mask assignment than OFPP_NONE. > > Signed-off-by: Alex Wang Applied, thanks. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] FAQ: Describe how to use ovs-ofctl with OpenFlow 1.1+.
Looks good. Acked-by: Justin Pettit --Justin On Jun 20, 2013, at 3:16 PM, Ben Pfaff wrote: > Signed-off-by: Ben Pfaff > --- > FAQ |5 + > 1 files changed, 5 insertions(+), 0 deletions(-) > > diff --git a/FAQ b/FAQ > index 1f0f841..7df4e90 100644 > --- a/FAQ > +++ b/FAQ > @@ -940,6 +940,11 @@ A: Open vSwitch 1.9 and earlier support only OpenFlow > 1.0 (plus > >ovs-vsctl set bridge br0 protocols=OpenFlow10,OpenFlow12,OpenFlow13 > > + Use the -O option to enable support for later versions of OpenFlow > + in ovs-ofctl. For example: > + > + ovs-ofctl -O OpenFlow13 dump-flows br0 > + >Support for OpenFlow 1.1 is incomplete enough that it cannot yet be >enabled, even experimentally. > > -- > 1.7.2.5 > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] tests: Check that parent process ID changes
On Thu, Jun 20, 2013 at 10:31:52PM +0100, James Page wrote: > On Ubuntu Saucy based desktops, upstart runs with user sessions > enabled which means that the init process under which a daemon > might run is not always pid = 1. > > Instead of checking for pid = 1, check to ensure that the parent > pid of the monitor is not the pid of the shell that started it. > > Signed-off-by: James Page I updated the subject line to be a better summary: "tests: Tolerate init process pid != 1." and applied this to master, branch-1.{11,10,9}. Thanks, Ben. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] FAQ: Describe how to use ovs-ofctl with OpenFlow 1.1+.
Applied to master, thanks. Oops, I forgot to add the ack. Sorry about that, I'll try harder in the future. On Thu, Jun 20, 2013 at 03:43:57PM -0700, Justin Pettit wrote: > Looks good. > > Acked-by: Justin Pettit > > --Justin > > > On Jun 20, 2013, at 3:16 PM, Ben Pfaff wrote: > > > Signed-off-by: Ben Pfaff > > --- > > FAQ |5 + > > 1 files changed, 5 insertions(+), 0 deletions(-) > > > > diff --git a/FAQ b/FAQ > > index 1f0f841..7df4e90 100644 > > --- a/FAQ > > +++ b/FAQ > > @@ -940,6 +940,11 @@ A: Open vSwitch 1.9 and earlier support only OpenFlow > > 1.0 (plus > > > >ovs-vsctl set bridge br0 protocols=OpenFlow10,OpenFlow12,OpenFlow13 > > > > + Use the -O option to enable support for later versions of OpenFlow > > + in ovs-ofctl. For example: > > + > > + ovs-ofctl -O OpenFlow13 dump-flows br0 > > + > >Support for OpenFlow 1.1 is incomplete enough that it cannot yet be > >enabled, even experimentally. > > > > -- > > 1.7.2.5 > > > > ___ > > dev mailing list > > dev@openvswitch.org > > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v5] gre: Restructure tunneling.
On Wed, Jun 12, 2013 at 5:46 PM, Pravin B Shelar wrote: > diff --git a/datapath/linux/compat/gre.c b/datapath/linux/compat/gre.c > new file mode 100644 > index 000..fbb9fb9 > --- /dev/null > +++ b/datapath/linux/compat/gre.c > +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) > +{ > + int err; > + > + skb_reset_inner_headers(skb); > + > + if (skb_is_gso(skb)) { > + if (gre_csum) > + OVS_GSO_CB(skb)->fix_segment = gre_csum_fix; > + } else { > + if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { > + err = skb_checksum_help(skb); > + if (err) > + goto error; > + > + } else if (skb->ip_summed != CHECKSUM_PARTIAL) > + skb->ip_summed = CHECKSUM_NONE; > + } > + return skb; > +error: > + kfree_skb(skb); > + return ERR_PTR(err); > +} [...] > +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, > + int hdr_len) > +{ > + struct gre_base_hdr *greh; > + > + __skb_push(skb, hdr_len); > + > + greh = (struct gre_base_hdr *)skb->data; > + greh->flags = tnl_flags_to_gre_flags(tpi->flags); > + greh->protocol = tpi->proto; > + > + if (tpi->flags & (TUNNEL_KEY | TUNNEL_CSUM | TUNNEL_SEQ)) { > + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); > + > + if (tpi->flags & TUNNEL_SEQ) { > + *ptr = tpi->seq; > + ptr--; > + } > + if (tpi->flags & TUNNEL_KEY) { > + *ptr = tpi->key; > + ptr--; > + } > + if (tpi->flags & TUNNEL_CSUM && !is_gre_gso(skb)) { > + *ptr = 0; > + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, > + skb->len, 0)); > + } > + } > +} Are these two checksum calls safe in the presence of shared frags? > diff --git a/datapath/linux/compat/gso.c b/datapath/linux/compat/gso.c > new file mode 100644 > index 000..8cb2e06 > --- /dev/null > +++ b/datapath/linux/compat/gso.c > +int rpl_ip_local_out(struct sk_buff *skb) > +{ [...] > + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { > + int err; > + > + /* Use default features for dst device. */ > + if (unlikely(skb_needs_linearize(skb, dst->dev->features))) { > + err = __skb_linearize(skb); > + if (unlikely(err)) > + return 0; > + } > + > + err = skb_checksum_help(skb); > + if (unlikely(err)) > + return 0; If we have the new version of skb_checksum_help (the one that checks if a frag is shared) then I don't think we need to force linearization and if we have the old one then I think we always need to linearize regardless of the device features. > + id = -1; > + } > + > + while (skb) { > + struct sk_buff *next_skb = skb->next; > + struct iphdr *iph; > + int err; > + > + if (next_skb) > + dst_clone(dst); Doesn't skb_gso_segment() increment the refcount when it adds the dst to each new segment? ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/4] odp-util: Introduce odp_flow_key_from_mask().
On Thu, Jun 20, 2013 at 03:16:42PM -0700, Justin Pettit wrote: > Add a new function for converting a mask into a set of > OVS_KEY_ATTR* attributes. This will be useful in a future commit. > > Signed-off-by: Justin Pettit "sparse" says: ../lib/odp-util.c:2427:47: warning: incorrect type in argument 4 (different base types) ../lib/odp-util.c:2427:47:expected restricted odp_port_t [usertype] odp_in_port ../lib/odp-util.c:2427:47:got unsigned int [unsigned] [usertype] odp_in_port ../lib/odp-util.c:2424:1: error: symbol 'odp_flow_key_from_flow' redeclared with different type (originally declared at ../lib/odp-util.h:103) - incompatible argument 3 (different base types) ../lib/odp-util.c:2439:1: error: symbol 'odp_flow_key_from_mask' redeclared with different type (originally declared at ../lib/odp-util.h:105) - incompatible argument 4 (different base types) Thanks, Ben. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/4] ofproto-dpif: Handle failed flow 'put's.
On Thu, Jun 20, 2013 at 03:16:43PM -0700, Justin Pettit wrote: > If a flow cannot be installed in the datapath, we should notice > this and not treat it as installed. This becomes an issue with > megaflows, since a batch of unique flows may come in that generate > a single new datapath megaflow that covers them. Since userspace > doesn't know whether the datapath supports megaflows, each unique > flow will get a separate flow entry (which overlap when masks are > applied) and all except the first will get rejected by a megaflow- > supporting datapath as duplicates. > > Signed-off-by: Justin Pettit The following is a little worrisome because EINVAL is very generic and can indicate many kinds of errors. If we always suppress logging EINVAL, then we could miss important problems. I am surprised that we do not use EEXIST or another error that is less overloaded: > +/* If flows arrive in a batch, userspace may push down multiple > + * unique flow definitions that overlap when wildcards are applied. > + * Kernels that support flow wildcarding will reject these flows as > + * duplicates (EINVAL), so lower the log level to debug for these > + * types of messages. */ > +return (error && error != EINVAL) ? VLL_WARN : VLL_DBG; ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 2/2] ovsdb-server: Add and remove databases during run time.
The commit allows a user to add a database file to a ovsdb-server during run time. One can also remove a database file from ovsdb-server's control. Feature #14595. Signed-off-by: Gurucharan Shetty --- ovsdb/jsonrpc-server.c | 10 ++ ovsdb/jsonrpc-server.h |2 + ovsdb/ovsdb-server.1.in | 18 +++ ovsdb/ovsdb-server.c| 348 +++ ovsdb/ovsdb.c |3 + ovsdb/server.c |9 ++ ovsdb/server.h |1 + tests/ovsdb-server.at | 136 ++ 8 files changed, 470 insertions(+), 57 deletions(-) diff --git a/ovsdb/jsonrpc-server.c b/ovsdb/jsonrpc-server.c index 9f99d64..0745ae6 100644 --- a/ovsdb/jsonrpc-server.c +++ b/ovsdb/jsonrpc-server.c @@ -132,6 +132,16 @@ ovsdb_jsonrpc_server_add_db(struct ovsdb_jsonrpc_server *svr, struct ovsdb *db) return ovsdb_server_add_db(&svr->up, db); } +/* Removes 'db' from the set of databases served out by 'svr'. Returns + * 'db' if successful, NULL if there is no database associated with 'db'. */ +void * +ovsdb_jsonrpc_server_remove_db(struct ovsdb_jsonrpc_server *svr, +struct ovsdb *db) +{ +return ovsdb_server_remove_db(&svr->up, db); +} + + void ovsdb_jsonrpc_server_destroy(struct ovsdb_jsonrpc_server *svr) { diff --git a/ovsdb/jsonrpc-server.h b/ovsdb/jsonrpc-server.h index f2395fc..707aa18 100644 --- a/ovsdb/jsonrpc-server.h +++ b/ovsdb/jsonrpc-server.h @@ -26,6 +26,8 @@ struct simap; struct ovsdb_jsonrpc_server *ovsdb_jsonrpc_server_create(void); bool ovsdb_jsonrpc_server_add_db(struct ovsdb_jsonrpc_server *, struct ovsdb *); +void *ovsdb_jsonrpc_server_remove_db(struct ovsdb_jsonrpc_server *, + struct ovsdb *); void ovsdb_jsonrpc_server_destroy(struct ovsdb_jsonrpc_server *); /* Options for a remote. */ diff --git a/ovsdb/ovsdb-server.1.in b/ovsdb/ovsdb-server.1.in index 82dd9c6..2e62ec9 100644 --- a/ovsdb/ovsdb-server.1.in +++ b/ovsdb/ovsdb-server.1.in @@ -152,6 +152,24 @@ not list remotes added indirectly because they were read from the database by configuring a \fBdb:\fR[\fIdb\fB,\fR]\fItable\fB,\fIcolumn\fR remote. . +.IP "\fBovsdb\-server/add\-db \fIdatabase\fR" +Adds the \fIdatabase\fR to the running \fBovsdb\-server\fR. The database +file must already have been created and initialized using, for example, +\fBovsdb\-tool create\fR. +. +.IP "\fBovsdb\-server/remove\-db \fIdatabase\fR" +Removes the specified \fIdatabase\fR from the running \fBovsdb\-server\fR. +If a \fIremote\fR has been configured such that it points to the specified +\fIdatabase\fR, then it is automatically removed. Any public key +infrastructure options specified through this database is no longer checked +for new changes, but the linked files are still used. (Adding the +\fIdatabase\fR back will recheck for the public key infrastructure options +specified through it.) +. +.IP "\fBovsdb\-server/list\-dbs" +Outputs a list of the currently configured databases added either through +the command line or through the \fBovsdb\-server/add\-db\fR command. +. .so lib/vlog-unixctl.man .so lib/memory-unixctl.man .so lib/coverage-unixctl.man diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index ea8fe0f..a9c910b 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -73,26 +73,28 @@ static char *private_key_file; static char *certificate_file; static char *ca_cert_file; static bool bootstrap_ca_cert; +static bool ssl_reconfigure = true; static unixctl_cb_func ovsdb_server_exit; static unixctl_cb_func ovsdb_server_compact; static unixctl_cb_func ovsdb_server_reconnect; -struct add_remote_aux { +struct load_config_aux { struct sset *remotes; +struct sset *dbnames; struct shash *all_dbs; FILE *config_tmpfile; +struct ovsdb_jsonrpc_server *jsonrpc; }; static unixctl_cb_func ovsdb_server_add_remote; - -struct remove_remote_aux { -struct sset *remotes; -FILE *config_tmpfile; -}; static unixctl_cb_func ovsdb_server_remove_remote; static unixctl_cb_func ovsdb_server_list_remotes; -static void open_db(const struct ovsdb_jsonrpc_server *jsonrpc, +static unixctl_cb_func ovsdb_server_add_database; +static unixctl_cb_func ovsdb_server_remove_database; +static unixctl_cb_func ovsdb_server_list_databases; + +static void open_db(struct ovsdb_jsonrpc_server *jsonrpc, struct db *db, struct shash *all_dbs); static void parse_options(int *argc, char **argvp[], @@ -108,8 +110,8 @@ static void update_remote_status(const struct ovsdb_jsonrpc_server *jsonrpc, const struct sset *remotes, struct shash *all_dbs); -static void save_config(FILE *config_file, const struct sset *); -static void load_config(FILE *config_file, struct sset *); +static void save_config(FILE *config_file, const struct load_config_aux *); +static void load_config(FILE *config_fi
[ovs-dev] [PATCH 1/2] ovsdb-server: Store databases in shash instead of array.
An upcoming commit provides the ability to add and remove databases. Having the databases in a shash instead of an array makes it easier to add and remove databases. Feature #14595. Signed-off-by: Gurucharan Shetty --- ovsdb/ovsdb-server.c | 186 +- 1 file changed, 107 insertions(+), 79 deletions(-) diff --git a/ovsdb/ovsdb-server.c b/ovsdb/ovsdb-server.c index 394f5a5..ea8fe0f 100644 --- a/ovsdb/ovsdb-server.c +++ b/ovsdb/ovsdb-server.c @@ -43,6 +43,7 @@ #include "process.h" #include "row.h" #include "simap.h" +#include "shash.h" #include "stream-ssl.h" #include "stream.h" #include "stress.h" @@ -79,8 +80,7 @@ static unixctl_cb_func ovsdb_server_reconnect; struct add_remote_aux { struct sset *remotes; -struct db *dbs; -size_t n_dbs; +struct shash *all_dbs; FILE *config_tmpfile; }; static unixctl_cb_func ovsdb_server_add_remote; @@ -92,18 +92,21 @@ struct remove_remote_aux { static unixctl_cb_func ovsdb_server_remove_remote; static unixctl_cb_func ovsdb_server_list_remotes; +static void open_db(const struct ovsdb_jsonrpc_server *jsonrpc, +struct db *db, struct shash *all_dbs); + static void parse_options(int *argc, char **argvp[], struct sset *remotes, char **unixctl_pathp, char **run_command); static void usage(void) NO_RETURN; static void reconfigure_from_db(struct ovsdb_jsonrpc_server *jsonrpc, -const struct db dbs[], size_t n_dbs, +const struct shash *all_dbs, struct sset *remotes); static void update_remote_status(const struct ovsdb_jsonrpc_server *jsonrpc, const struct sset *remotes, - struct db dbs[], size_t n_dbs); + struct shash *all_dbs); static void save_config(FILE *config_file, const struct sset *); static void load_config(FILE *config_file, struct sset *); @@ -124,8 +127,9 @@ main(int argc, char *argv[]) struct remove_remote_aux remove_remote_aux; FILE *config_tmpfile; -struct db *dbs; -int n_dbs; +struct db *db; +struct shash all_dbs; +struct shash_node *node; int i; proctitle_init(argc, argv); @@ -152,34 +156,22 @@ main(int argc, char *argv[]) /* Load the saved config. */ load_config(config_tmpfile, &remotes); -n_dbs = MAX(1, argc); -dbs = xcalloc(n_dbs + 1, sizeof *dbs); +shash_init(&all_dbs); +jsonrpc = ovsdb_jsonrpc_server_create(); + if (argc > 0) { for (i = 0; i < argc; i++) { -dbs[i].filename = argv[i]; -} +db = xzalloc(sizeof *db); +db->filename = argv[i]; +open_db(jsonrpc, db, &all_dbs); + } } else { -dbs[0].filename = xasprintf("%s/conf.db", ovs_dbdir()); -} - -for (i = 0; i < n_dbs; i++) { -struct ovsdb_error *error; - -error = ovsdb_file_open(dbs[i].filename, false, -&dbs[i].db, &dbs[i].file); -if (error) { -ovs_fatal(0, "%s", ovsdb_error_to_string(error)); -} +db = xzalloc(sizeof *db); +db->filename = xasprintf("%s/conf.db", ovs_dbdir()); +open_db(jsonrpc, db, &all_dbs); } -jsonrpc = ovsdb_jsonrpc_server_create(); -for (i = 0; i < n_dbs; i++) { -if (!ovsdb_jsonrpc_server_add_db(jsonrpc, dbs[i].db)) { -ovs_fatal(0, "%s: duplicate database name", - dbs[i].db->schema->name); -} -} -reconfigure_from_db(jsonrpc, dbs, n_dbs, &remotes); +reconfigure_from_db(jsonrpc, &all_dbs, &remotes); retval = unixctl_server_create(unixctl_path, &unixctl); if (retval) { @@ -213,13 +205,12 @@ main(int argc, char *argv[]) unixctl_command_register("exit", "", 0, 0, ovsdb_server_exit, &exiting); unixctl_command_register("ovsdb-server/compact", "", 0, 1, - ovsdb_server_compact, dbs); + ovsdb_server_compact, &all_dbs); unixctl_command_register("ovsdb-server/reconnect", "", 0, 0, ovsdb_server_reconnect, jsonrpc); add_remote_aux.remotes = &remotes; -add_remote_aux.dbs = dbs; -add_remote_aux.n_dbs = n_dbs; +add_remote_aux.all_dbs = &all_dbs; add_remote_aux.config_tmpfile = config_tmpfile; unixctl_command_register("ovsdb-server/add-remote", "REMOTE", 1, 1, ovsdb_server_add_remote, &add_remote_aux); @@ -234,16 +225,15 @@ main(int argc, char *argv[]) exiting = false; while (!exiting) { -int i; - memory_run(); if (memory_should_report()) { struct simap usage; simap_init(&usage); ovsdb_jsonrpc_server_get_memory_usage(jsonrpc, &usage); -for (i = 0; i
Re: [ovs-dev] [PATCH 2/4] ofproto-dpif: Handle failed flow 'put's.
On Jun 20, 2013, at 4:06 PM, Ben Pfaff wrote: > On Thu, Jun 20, 2013 at 03:16:43PM -0700, Justin Pettit wrote: >> If a flow cannot be installed in the datapath, we should notice >> this and not treat it as installed. This becomes an issue with >> megaflows, since a batch of unique flows may come in that generate >> a single new datapath megaflow that covers them. Since userspace >> doesn't know whether the datapath supports megaflows, each unique >> flow will get a separate flow entry (which overlap when masks are >> applied) and all except the first will get rejected by a megaflow- >> supporting datapath as duplicates. >> >> Signed-off-by: Justin Pettit > > The following is a little worrisome because EINVAL is very generic and > can indicate many kinds of errors. If we always suppress logging > EINVAL, then we could miss important problems. I am surprised that we > do not use EEXIST or another error that is less overloaded: Agreed. Andy's going to change the kernel to return EEXIST instead. Are you okay with the patch with EINVAL references replaced with EEXIST? --Justin ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 3/4] dpif: Log flow masks for "put" and "dump_next".
On Thu, Jun 20, 2013 at 03:16:44PM -0700, Justin Pettit wrote: > When debugging the system, it's useful to not just see the key but > also the mask. > > Signed-off-by: Justin Pettit Acked-by: Ben Pfaff ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/4] ofproto-dpif: Handle failed flow 'put's.
On Thu, Jun 20, 2013 at 04:20:16PM -0700, Justin Pettit wrote: > On Jun 20, 2013, at 4:06 PM, Ben Pfaff wrote: > > On Thu, Jun 20, 2013 at 03:16:43PM -0700, Justin Pettit wrote: > > The following is a little worrisome because EINVAL is very generic and > > can indicate many kinds of errors. If we always suppress logging > > EINVAL, then we could miss important problems. I am surprised that we > > do not use EEXIST or another error that is less overloaded: > > Agreed. Andy's going to change the kernel to return EEXIST instead. > Are you okay with the patch with EINVAL references replaced with > EEXIST? The other thing that worries me is that you mentioned that you saw multiple installations of a single subfacet in a single batch. If so, that worries me because it seems like this could happen: handle_flow_miss_with_facet() queues up subfacet sf for installation into fast path, and sets sf->path to SF_FAST_PATH. handle_flow_miss_with_facet() queues up subfacet sf for installation into fast path, again, and sets sf->path to SF_FAST_PATH. ... The first installation succeeds. OK. The second installation fails (EINVAL, EEXIST, whatever), so we set sf->path back to SF_NOT_INSTALLED. But it was installed by the first "put". But I don't understand how we'd get multiple installations to begin with, since the first handle_flow_miss_with_facet() call would set sf->path to SF_FAST_PATH and the second one would see that sf->path was already SF_FAST_PATH and not try to install it again. Do you have any idea what was going on there? ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/4] odp-util: Introduce odp_flow_key_from_mask().
On Jun 20, 2013, at 3:59 PM, Ben Pfaff wrote: > On Thu, Jun 20, 2013 at 03:16:42PM -0700, Justin Pettit wrote: >> Add a new function for converting a mask into a set of >> OVS_KEY_ATTR* attributes. This will be useful in a future commit. >> >> Signed-off-by: Justin Pettit > > "sparse" says: > >../lib/odp-util.c:2427:47: warning: incorrect type in argument 4 > (different base types) >../lib/odp-util.c:2427:47:expected restricted odp_port_t [usertype] > odp_in_port >../lib/odp-util.c:2427:47:got unsigned int [unsigned] [usertype] > odp_in_port >../lib/odp-util.c:2424:1: error: symbol 'odp_flow_key_from_flow' > redeclared with different type (originally declared at ../lib/odp-util.h:103) > - incompatible argument 3 (different base types) >../lib/odp-util.c:2439:1: error: symbol 'odp_flow_key_from_mask' > redeclared with different type (originally declared at ../lib/odp-util.h:105) > - incompatible argument 4 (different base types) Uh, forgot to update from Alex's patches. Fixed. Thanks, --Justin ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/4] odp-util: Introduce odp_flow_key_from_mask().
On Thu, Jun 20, 2013 at 04:32:39PM -0700, Justin Pettit wrote: > On Jun 20, 2013, at 3:59 PM, Ben Pfaff wrote: > > > On Thu, Jun 20, 2013 at 03:16:42PM -0700, Justin Pettit wrote: > >> Add a new function for converting a mask into a set of > >> OVS_KEY_ATTR* attributes. This will be useful in a future commit. > >> > >> Signed-off-by: Justin Pettit > > > > "sparse" says: > > > >../lib/odp-util.c:2427:47: warning: incorrect type in argument 4 > > (different base types) > >../lib/odp-util.c:2427:47:expected restricted odp_port_t [usertype] > > odp_in_port > >../lib/odp-util.c:2427:47:got unsigned int [unsigned] [usertype] > > odp_in_port > >../lib/odp-util.c:2424:1: error: symbol 'odp_flow_key_from_flow' > > redeclared with different type (originally declared at > > ../lib/odp-util.h:103) - incompatible argument 3 (different base types) > >../lib/odp-util.c:2439:1: error: symbol 'odp_flow_key_from_mask' > > redeclared with different type (originally declared at > > ../lib/odp-util.h:105) - incompatible argument 4 (different base types) > > Uh, forgot to update from Alex's patches. Fixed. Can I have the fixed version so I can try it too? Thanks, Ben. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] datapath: Return EEXIST on overlapping new flow request
Flow update still requires unmasked key to match. If not, return EINVAL. CC: Justin Pettit Signed-off-by: Andy Zhou --- datapath/datapath.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/datapath/datapath.c b/datapath/datapath.c index ee3d5e4..a514e74 100644 --- a/datapath/datapath.c +++ b/datapath/datapath.c @@ -1343,12 +1343,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) /* We found a matching flow. */ struct sw_flow_actions *old_acts; - /* Make sure the it has the same unmasked key. */ - if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) { - error = -EINVAL; - goto err_unlock_ovs; - } - /* Bail out if we're not allowed to modify an existing flow. * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL * because Generic Netlink treats the latter as a dump @@ -1360,6 +1354,11 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL)) goto err_unlock_ovs; + /* The unmasked key has to be the same for flow updates. */ + error = -EINVAL; + if (!ovs_flow_cmp_unmasked_key(flow, &key, match.range.end)) + goto err_unlock_ovs; + /* Update actions. */ old_acts = ovsl_dereference(flow->sf_acts); rcu_assign_pointer(flow->sf_acts, acts); -- 1.7.9.5 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/4] odp-util: Introduce odp_flow_key_from_mask().
On Jun 20, 2013, at 4:33 PM, Ben Pfaff wrote: > On Thu, Jun 20, 2013 at 04:32:39PM -0700, Justin Pettit wrote: >> On Jun 20, 2013, at 3:59 PM, Ben Pfaff wrote: >> >>> On Thu, Jun 20, 2013 at 03:16:42PM -0700, Justin Pettit wrote: Add a new function for converting a mask into a set of OVS_KEY_ATTR* attributes. This will be useful in a future commit. Signed-off-by: Justin Pettit >>> >>> "sparse" says: >>> >>> ../lib/odp-util.c:2427:47: warning: incorrect type in argument 4 >>> (different base types) >>> ../lib/odp-util.c:2427:47:expected restricted odp_port_t [usertype] >>> odp_in_port >>> ../lib/odp-util.c:2427:47:got unsigned int [unsigned] [usertype] >>> odp_in_port >>> ../lib/odp-util.c:2424:1: error: symbol 'odp_flow_key_from_flow' >>> redeclared with different type (originally declared at >>> ../lib/odp-util.h:103) - incompatible argument 3 (different base types) >>> ../lib/odp-util.c:2439:1: error: symbol 'odp_flow_key_from_mask' >>> redeclared with different type (originally declared at >>> ../lib/odp-util.h:105) - incompatible argument 4 (different base types) >> >> Uh, forgot to update from Alex's patches. Fixed. > > Can I have the fixed version so I can try it too? I'll respin the whole series once I get feedback on part 4. --Justin ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 4/4] ofproto-dpif: Use megaflows.
On Thu, Jun 20, 2013 at 03:16:45PM -0700, Justin Pettit wrote: > The commit configures the masks generated from megaflows and pushes > them through the dpif layer. > > With this commit and a wildcard supporting OVS kernel module, > ovs-vswitchd's flow setup rate is very close to that of the Linux > bridge. > > Signed-off-by: Justin Pettit "sparse" says: ../ofproto/ofproto-dpif.c:5063:42: warning: incorrect type in argument 4 (different base types) ../ofproto/ofproto-dpif.c:5063:42:expected restricted odp_port_t [usertype] odp_in_port ../ofproto/ofproto-dpif.c:5063:42:got unsigned int ../ofproto/ofproto-dpif.c:3474:45: warning: incorrect type in argument 4 (different base types) ../ofproto/ofproto-dpif.c:3474:45:expected restricted odp_port_t [usertype] odp_in_port ../ofproto/ofproto-dpif.c:3474:45:got unsigned int I don't think init_flow_miss_execute_op() needs to initialize op->mask (though it doesn't hurt). Thanks, Ben. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/4] odp-util: Introduce odp_flow_key_from_mask().
On Thu, Jun 20, 2013 at 04:35:46PM -0700, Justin Pettit wrote: > On Jun 20, 2013, at 4:33 PM, Ben Pfaff wrote: > > > On Thu, Jun 20, 2013 at 04:32:39PM -0700, Justin Pettit wrote: > >> On Jun 20, 2013, at 3:59 PM, Ben Pfaff wrote: > >> > >>> On Thu, Jun 20, 2013 at 03:16:42PM -0700, Justin Pettit wrote: > Add a new function for converting a mask into a set of > OVS_KEY_ATTR* attributes. This will be useful in a future commit. > > Signed-off-by: Justin Pettit > >>> > >>> "sparse" says: > >>> > >>> ../lib/odp-util.c:2427:47: warning: incorrect type in argument 4 > >>> (different base types) > >>> ../lib/odp-util.c:2427:47:expected restricted odp_port_t [usertype] > >>> odp_in_port > >>> ../lib/odp-util.c:2427:47:got unsigned int [unsigned] [usertype] > >>> odp_in_port > >>> ../lib/odp-util.c:2424:1: error: symbol 'odp_flow_key_from_flow' > >>> redeclared with different type (originally declared at > >>> ../lib/odp-util.h:103) - incompatible argument 3 (different base types) > >>> ../lib/odp-util.c:2439:1: error: symbol 'odp_flow_key_from_mask' > >>> redeclared with different type (originally declared at > >>> ../lib/odp-util.h:105) - incompatible argument 4 (different base types) > >> > >> Uh, forgot to update from Alex's patches. Fixed. > > > > Can I have the fixed version so I can try it too? > > I'll respin the whole series once I get feedback on part 4. Thanks! ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] datapath: Return EEXIST on overlapping new flow request
On Thu, Jun 20, 2013 at 4:36 PM, Andy Zhou wrote: > Flow update still requires unmasked key to match. If not, > return EINVAL. > > CC: Justin Pettit > Signed-off-by: Andy Zhou Applied, thanks. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [threads 02/11] ovs-thread: Add per-thread data support.
On 20 June 2013 16:18, Ben Pfaff wrote: > On Wed, Jun 19, 2013 at 01:17:03PM -0700, Ben Pfaff wrote: >> POSIX defines a portable pthread_key_t API for per-thread data. GCC and >> C11 have two different forms of per-thread data that are generally faster >> than the POSIX API, where they are available. This commit adds a >> macro-based wrapper, DEFINE_PER_THREAD_DATA, that takes advantage of the >> GCC extension where it is available and falls back to the POSIX API >> otherwise. (I'm not aware of any compilers that implement the C11 feature, >> so this commit doesn't try to use it.) > > Ed Maste pointed out off-list that clang on FreeBSD supports > _Thread_local. Here's a revised version of the patch that supports > both _Thread_local and __thread. I've also updated the "reviews" > branch. I changed the autoconf test like so: diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4 index 3895346..eb91b1a 100644 --- a/m4/openvswitch.m4 +++ b/m4/openvswitch.m4 @@ -402,7 +402,10 @@ AC_DEFUN([OVS_CHECK__THREAD_LOCAL], [whether $CC supports _Thread_local], [ovs_cv__Thread_local], [AC_LINK_IFELSE( -[AC_LANG_PROGRAM([static _Thread_local var;], [return var;])], +[AC_LANG_PROGRAM( + [#include + static _Thread_local int var;], + [return var;])], [ovs_cv__Thread_local=yes], [ovs_cv__Thread_local=no])]) if test $ovs_cv__Thread_local = no; then This version builds and passes unit tests for me on FreeBSD 9.x, with both GCC and Clang. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [RFC] Making OVS less Ethernet specific
On Mon, Jun 17, 2013 at 11:19 AM, Lori Jakab wrote: > On 6/11/13 1:09 AM, Jesse Gross wrote: >> >> On Wed, Jun 5, 2013 at 2:56 PM, Lori Jakab wrote: >>> >>> At a high level, we would introduce layer 3 (tunnel) vports, and LISP >>> would be such a vport. Whenever a packet that ingressed on a L2 vport >>> needs to egress on a L3 vport, we apply the internal pop_eth action >>> automatically. For packets going from L3 vports to L2 vports, a >>> push_eth action would add a MAC header, with addresses determined by ARP >>> resolution in user space. >> >> >> One other thing that you might want to keep in the back of your mind >> as you implement this is how it might interact with MAC-in-MAC >> encapsulation. OVS doesn't currently support this but if it did (and >> there have been requests), it would presumably want to use these >> actions as well. > > > Thanks for pointing this out, will keep it in mind. > > I gather that these MAC header actions should then be VLAN tag aware, and be > easily extended in the future to recognize 802.1ah service encapsulation? It depends on your definition of aware, I guess. What behavior were you thinking of? It may actually be better to make the kernel fairly dumb and just push headers on as it gets them. That would actually give userspace more flexibility to reorder and change things than if the kernel tries to arrange them properly. >> >>> One such decision is how to handle the flow key. I set all fields in >>> key->eth to 0, except the type, because we still need to know what kind >>> of L3 packet do we have. Since a lot of code is accessing >>> key->eth.type, this is easier than having this information in a >>> different place, although it would be more elegant to set this field to >>> 0 as well. >> >> >> Can we use skb->protocol for the cases where we currently access the >> EtherType? Are there cases that you are particularly concerned about? > > > The only case I'm concerned about is in the action validation code, > particularly the validate_set() and validate_tp_port() functions, since they > don't have access to the skb, and need to know the layer 3 protocol of the > flow. In validate_set() we could relax the check for eth.type for > OVS_KEY_ATTR_IPV4 and OVS_KEY_ATTR_IPV6, but I'm not sure what to do about > validate_tp_port(). > > In general, I think it would be a good idea for the flow key to have a field > specifying the layer 3 protocol, when an ethernet header is not present. That makes sense to me. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 1/4] odp-util: Introduce odp_flow_key_from_mask().
Add a new function for converting a mask into a set of OVS_KEY_ATTR* attributes. This will be useful in a future commit. Signed-off-by: Justin Pettit --- lib/odp-util.c | 139 --- lib/odp-util.h |2 + 2 files changed, 93 insertions(+), 48 deletions(-) diff --git a/lib/odp-util.c b/lib/odp-util.c index d70cf17..fd4207a 100644 --- a/lib/odp-util.c +++ b/lib/odp-util.c @@ -2249,45 +2249,44 @@ ovs_to_odp_frag(uint8_t nw_frag) : OVS_FRAG_TYPE_LATER); } -/* Appends a representation of 'flow' as OVS_KEY_ATTR_* attributes to 'buf'. - * 'flow->in_port' is ignored (since it is likely to be an OpenFlow port - * number rather than a datapath port number). Instead, if 'odp_in_port' - * is anything other than ODPP_NONE, it is included in 'buf' as the input - * port. - * - * 'buf' must have at least ODPUTIL_FLOW_KEY_BYTES bytes of space, or be - * capable of being expanded to allow for that much space. */ -void -odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, - odp_port_t odp_in_port) +static void +odp_flow_key_from_flow__(struct ofpbuf *buf, const struct flow *data, + const struct flow *flow, odp_port_t odp_in_port) { +bool is_mask; struct ovs_key_ethernet *eth_key; size_t encap; +/* We assume that if 'data' and 'flow' are not the same, we should + * treat 'data' as a mask. */ +is_mask = (data != flow); + if (flow->skb_priority) { -nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, flow->skb_priority); +nl_msg_put_u32(buf, OVS_KEY_ATTR_PRIORITY, data->skb_priority); } if (flow->tunnel.ip_dst) { -tun_key_to_attr(buf, &flow->tunnel); +tun_key_to_attr(buf, &data->tunnel); } if (flow->skb_mark) { -nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, flow->skb_mark); +nl_msg_put_u32(buf, OVS_KEY_ATTR_SKB_MARK, data->skb_mark); } -if (odp_in_port != ODPP_NONE) { +/* Add an ingress port attribute if this is a mask or 'odp_in_port' + * is not the magical value "ODPP_NONE". */ +if (is_mask || odp_in_port != ODPP_NONE) { nl_msg_put_odp_port(buf, OVS_KEY_ATTR_IN_PORT, odp_in_port); } eth_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_ETHERNET, sizeof *eth_key); -memcpy(eth_key->eth_src, flow->dl_src, ETH_ADDR_LEN); -memcpy(eth_key->eth_dst, flow->dl_dst, ETH_ADDR_LEN); +memcpy(eth_key->eth_src, data->dl_src, ETH_ADDR_LEN); +memcpy(eth_key->eth_dst, data->dl_dst, ETH_ADDR_LEN); if (flow->vlan_tci != htons(0) || flow->dl_type == htons(ETH_TYPE_VLAN)) { nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_TYPE_VLAN)); -nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, flow->vlan_tci); +nl_msg_put_be16(buf, OVS_KEY_ATTR_VLAN, data->vlan_tci); encap = nl_msg_start_nested(buf, OVS_KEY_ATTR_ENCAP); if (flow->vlan_tci == htons(0)) { goto unencap; @@ -2297,33 +2296,47 @@ odp_flow_key_from_flow(struct ofpbuf *buf, const struct flow *flow, } if (ntohs(flow->dl_type) < ETH_TYPE_MIN) { +/* For backwards compatibility with kernels that don't support + * wildcarding, the following convention is used to encode the + * OVS_KEY_ATTR_ETHERTYPE for key and mask: + * + * key maskmatches + * --- + * >0x5ff 0x Specified Ethernet II Ethertype. + * >0x5ff 0 Any Ethernet II or non-Ethernet II frame. + * 0x Any non-Ethernet II frame (except valid + *802.3 SNAP packet with valid eth_type). + */ +if (is_mask) { +nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type); +} goto unencap; } -nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, flow->dl_type); +nl_msg_put_be16(buf, OVS_KEY_ATTR_ETHERTYPE, data->dl_type); if (flow->dl_type == htons(ETH_TYPE_IP)) { struct ovs_key_ipv4 *ipv4_key; ipv4_key = nl_msg_put_unspec_uninit(buf, OVS_KEY_ATTR_IPV4, sizeof *ipv4_key); -ipv4_key->ipv4_src = flow->nw_src; -ipv4_key->ipv4_dst = flow->nw_dst; -ipv4_key->ipv4_proto = flow->nw_proto; -ipv4_key->ipv4_tos = flow->nw_tos; -ipv4_key->ipv4_ttl = flow->nw_ttl; -ipv4_key->ipv4_frag = ovs_to_odp_frag(flow->nw_frag); +ipv4_key->ipv4_src = data->nw_src; +ipv4_key->ipv4_dst = data->nw_dst; +ipv4_key->ipv4_proto = data->nw_proto; +ipv4_key->ipv4_tos = data->nw_tos; +ipv4_key->ipv4_ttl = data->nw_ttl; +ipv4_key->ipv4_frag = ovs_to_odp_frag(data->nw_frag); } else if (flow->dl_type == htons(ETH_TYPE_IPV6)) { struct ovs_key_ipv6 *ipv6_key; ipv6_key = nl_msg_put_unspec_uninit(buf, OVS_K
[ovs-dev] [PATCH 3/4] dpif: Log flow masks for "put" and "dump_next".
When debugging the system, it's useful to not just see the key but also the mask. Signed-off-by: Justin Pettit Acked-by: Ben Pfaff --- lib/dpif.c | 15 +-- ofproto/ofproto-dpif.c |6 +++--- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/lib/dpif.c b/lib/dpif.c index bab1ffe..169be20 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -81,6 +81,7 @@ static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5); static void log_flow_message(const struct dpif *dpif, int error, const char *operation, const struct nlattr *key, size_t key_len, + const struct nlattr *mask, size_t mask_len, const struct dpif_flow_stats *stats, const struct nlattr *actions, size_t actions_len); static void log_operation(const struct dpif *, const char *operation, @@ -808,8 +809,8 @@ dpif_flow_get(const struct dpif *dpif, actions = NULL; actions_len = 0; } -log_flow_message(dpif, error, "flow_get", key, key_len, stats, - actions, actions_len); +log_flow_message(dpif, error, "flow_get", key, key_len, + NULL, 0, stats, actions, actions_len); } return error; } @@ -982,6 +983,7 @@ dpif_flow_dump_next(struct dpif_flow_dump *dump, } else if (should_log_flow_message(error)) { log_flow_message(dpif, error, "flow_dump", key ? *key : NULL, key ? *key_len : 0, + mask ? *mask : NULL, mask ? *mask_len : 0, stats ? *stats : NULL, actions ? *actions : NULL, actions ? *actions_len : 0); } @@ -1279,6 +1281,7 @@ should_log_flow_message(int error) static void log_flow_message(const struct dpif *dpif, int error, const char *operation, const struct nlattr *key, size_t key_len, + const struct nlattr *mask, size_t mask_len, const struct dpif_flow_stats *stats, const struct nlattr *actions, size_t actions_len) { @@ -1291,7 +1294,7 @@ log_flow_message(const struct dpif *dpif, int error, const char *operation, if (error) { ds_put_format(&ds, "(%s) ", strerror(error)); } -odp_flow_key_format(key, key_len, &ds); +odp_flow_format(key, key_len, mask, mask_len, &ds); if (stats) { ds_put_cstr(&ds, ", "); dpif_flow_stats_format(stats, &ds); @@ -1323,8 +1326,8 @@ log_flow_put_message(struct dpif *dpif, const struct dpif_flow_put *put, ds_put_cstr(&s, "[zero]"); } log_flow_message(dpif, error, ds_cstr(&s), - put->key, put->key_len, put->stats, - put->actions, put->actions_len); + put->key, put->key_len, put->mask, put->mask_len, + put->stats, put->actions, put->actions_len); ds_destroy(&s); } } @@ -1335,7 +1338,7 @@ log_flow_del_message(struct dpif *dpif, const struct dpif_flow_del *del, { if (should_log_flow_message(error)) { log_flow_message(dpif, error, "flow_del", del->key, del->key_len, - !error ? del->stats : NULL, NULL, 0); + NULL, 0, !error ? del->stats : NULL, NULL, 0); } } diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 7ab3e7f..8dbda93 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -4137,12 +4137,12 @@ update_stats(struct dpif_backer *backer) { const struct dpif_flow_stats *stats; struct dpif_flow_dump dump; -const struct nlattr *key; -size_t key_len; +const struct nlattr *key, *mask; +size_t key_len, mask_len; dpif_flow_dump_start(&dump, backer->dpif); while (dpif_flow_dump_next(&dump, &key, &key_len, - NULL, NULL, NULL, NULL, &stats)) { + &mask, &mask_len, NULL, NULL, &stats)) { struct subfacet *subfacet; uint32_t key_hash; -- 1.7.5.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 2/4] ofproto-dpif: Handle failed flow 'put's.
If a flow cannot be installed in the datapath, we should notice this and not treat it as installed. This becomes an issue with megaflows, since a batch of unique flows may come in that generate a single new datapath megaflow that covers them. Since userspace doesn't know whether the datapath supports megaflows, each unique flow will get a separate flow entry (which overlap when masks are applied) and all except the first will get rejected by a megaflow- supporting datapath as duplicates. Signed-off-by: Justin Pettit --- lib/dpif.c |7 ++- ofproto/ofproto-dpif.c | 24 ++-- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/lib/dpif.c b/lib/dpif.c index 4193dcb..bab1ffe 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -1261,7 +1261,12 @@ log_operation(const struct dpif *dpif, const char *operation, int error) static enum vlog_level flow_message_log_level(int error) { -return error ? VLL_WARN : VLL_DBG; +/* If flows arrive in a batch, userspace may push down multiple + * unique flow definitions that overlap when wildcards are applied. + * Kernels that support flow wildcarding will reject these flows as + * duplicates (EEXIST), so lower the log level to debug for these + * types of messages. */ +return (error && error != EEXIST) ? VLL_WARN : VLL_DBG; } static bool diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 6fa7894..7ab3e7f 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -69,6 +69,7 @@ COVERAGE_DEFINE(facet_changed_rule); COVERAGE_DEFINE(facet_revalidate); COVERAGE_DEFINE(facet_unexpected); COVERAGE_DEFINE(facet_suppress); +COVERAGE_DEFINE(subfacet_install_fail); struct flow_miss; struct facet; @@ -3245,6 +3246,10 @@ struct flow_miss_op { uint64_t slow_stub[128 / 8]; /* Buffer for compose_slow_path() */ struct xlate_out xout; bool xout_garbage; /* 'xout' needs to be uninitialized? */ + +/* If this is a "put" op, then a pointer to the subfacet that should + * be destroyed if the operation fails. */ +struct subfacet *subfacet; }; /* Sends an OFPT_PACKET_IN message for 'packet' of type OFPR_NO_MATCH to each @@ -3307,6 +3312,7 @@ init_flow_miss_execute_op(struct flow_miss *miss, struct ofpbuf *packet, eth_pop_vlan(packet); } +op->subfacet = NULL; op->xout_garbage = false; op->dpif_op.type = DPIF_OP_EXECUTE; op->dpif_op.u.execute.key = miss->key; @@ -3461,6 +3467,7 @@ handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet, op->xout_garbage = false; op->dpif_op.type = DPIF_OP_FLOW_PUT; +op->subfacet = subfacet; put->flags = DPIF_FP_CREATE | DPIF_FP_MODIFY; put->key = miss->key; put->key_len = miss->key_len; @@ -3771,8 +3778,19 @@ handle_miss_upcalls(struct dpif_backer *backer, struct dpif_upcall *upcalls, } dpif_operate(backer->dpif, dpif_ops, n_ops); -/* Free memory. */ for (i = 0; i < n_ops; i++) { +/* Destroy subfacets for flows that weren't installed. */ +if (dpif_ops[i]->error != 0 +&& flow_miss_ops[i].dpif_op.type == DPIF_OP_FLOW_PUT +&& flow_miss_ops[i].subfacet) { +struct subfacet *subfacet = flow_miss_ops[i].subfacet; + +COVERAGE_INC(subfacet_install_fail); + +subfacet->path = SF_NOT_INSTALLED; +} + +/* Free memory. */ if (flow_miss_ops[i].xout_garbage) { xlate_out_uninit(&flow_miss_ops[i].xout); } @@ -5037,7 +5055,9 @@ subfacet_install(struct subfacet *subfacet, const struct ofpbuf *odp_actions, subfacet_reset_dp_stats(subfacet, stats); } -if (!ret) { +if (ret) { +COVERAGE_INC(subfacet_install_fail); +} else { subfacet->path = path; } return ret; -- 1.7.5.4 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH 4/4] ofproto-dpif: Use megaflows.
The commit configures the masks generated from megaflows and pushes them through the dpif layer. With this commit and a wildcard supporting OVS kernel module, ovs-vswitchd's flow setup rate is very close to that of the Linux bridge. Signed-off-by: Justin Pettit --- NEWS |4 PORTING| 32 ofproto/ofproto-dpif.c | 23 +++ 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/NEWS b/NEWS index aae3378..7b43447 100644 --- a/NEWS +++ b/NEWS @@ -11,6 +11,10 @@ post-v1.11.0 v1.11.0 - xx xxx - +- Support for megaflows, which allows wildcarding in the kernel (and + any dpif implementation that supports wildcards). Depending on + the flow table and switch configuration, flow set up rates are + close to the Linux bridge. - The "tutorial" directory contains a new tutorial for some advanced Open vSwitch features. - Stable bond mode has been removed. diff --git a/PORTING b/PORTING index ffde296..53c19d3 100644 --- a/PORTING +++ b/PORTING @@ -148,9 +148,9 @@ depends on a few different factors: * A dpif provider is usually easier to implement, but most appropriate for software switching. It "explodes" wildcard - rules into exact-match entries. This allows fast hash lookups - in software, but makes inefficient use of TCAMs in hardware - that support wildcarding. + rules into exact-match entries (with an optional wildcard mask). + This allows fast hash lookups in software, but makes + inefficient use of TCAMs in hardware that support wildcarding. The following sections describe how to implement each kind of port. @@ -175,18 +175,26 @@ Writing a dpif Provider Open vSwitch has a built-in ofproto provider named "ofproto-dpif", which is built on top of a library for manipulating datapaths, called -"dpif". A "datapath" is a simple flow table, one that supports only -exact-match flows, that is, flows without wildcards. When a packet -arrives on a network device, the datapath looks for it in this -exact-match table. If there is a match, then it performs the -associated actions. If there is no match, the datapath passes the -packet up to ofproto-dpif, which maintains an OpenFlow flow table -(that supports wildcards). If the packet matches in this flow table, -then ofproto-dpif executes its actions and inserts a new exact-match -entry into the dpif flow table. (Otherwise, ofproto-dpif passes the +"dpif". A "datapath" is a simple flow table, one that is only required +to support exact-match flows, that is, flows without wildcards. When a +packet arrives on a network device, the datapath looks for it in this +table. If there is a match, then it performs the associated actions. +If there is no match, the datapath passes the packet up to ofproto-dpif, +which maintains the full OpenFlow flow table. If the packet matches in +this flow table, then ofproto-dpif executes its actions and inserts a +new entry into the dpif flow table. (Otherwise, ofproto-dpif passes the packet up to ofproto to send the packet to the OpenFlow controller, if one is configured.) +When calculating the dpif flow, ofproto-dpif generates an exact-match +flow that describes the missed packet. It makes an effort to figure out +what fields can be wildcarded based on the switch's configuration and +OpenFlow flow table. The dpif is free to ignore the suggested wildcards +and only support the exact-match entry. However, if the dpif supports +wildcarding, then it can use the masks to match multiple flows with +fewer entries and potentially significantly reduce the number of flow +misses handled by ofproto-dpif. + The "dpif" library in turn delegates much of its functionality to a "dpif provider". The following diagram shows how dpif providers fit into the Open vSwitch architecture: diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index 8dbda93..9eed040 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -3247,6 +3247,9 @@ struct flow_miss_op { struct xlate_out xout; bool xout_garbage; /* 'xout' needs to be uninitialized? */ +struct ofpbuf mask; /* Flow mask for "put" ops. */ +struct odputil_keybuf maskbuf; + /* If this is a "put" op, then a pointer to the subfacet that should * be destroyed if the operation fails. */ struct subfacet *subfacet; @@ -3318,6 +3321,7 @@ init_flow_miss_execute_op(struct flow_miss *miss, struct ofpbuf *packet, op->dpif_op.u.execute.key = miss->key; op->dpif_op.u.execute.key_len = miss->key_len; op->dpif_op.u.execute.packet = packet; +ofpbuf_use_stack(&op->mask, &op->maskbuf, sizeof op->maskbuf); } /* Helper for handle_flow_miss_without_facet() and @@ -3465,14 +3469,19 @@ handle_flow_miss_with_facet(struct flow_miss *miss, struct facet *facet, subfacet->path = want_path; +
Re: [ovs-dev] [PATCH 4/4] ofproto-dpif: Use megaflows.
On Jun 20, 2013, at 4:38 PM, Ben Pfaff wrote: > "sparse" says: > >../ofproto/ofproto-dpif.c:5063:42: warning: incorrect type in argument 4 > (different base types) >../ofproto/ofproto-dpif.c:5063:42:expected restricted odp_port_t > [usertype] odp_in_port >../ofproto/ofproto-dpif.c:5063:42:got unsigned int >../ofproto/ofproto-dpif.c:3474:45: warning: incorrect type in argument 4 > (different base types) >../ofproto/ofproto-dpif.c:3474:45:expected restricted odp_port_t > [usertype] odp_in_port >../ofproto/ofproto-dpif.c:3474:45:got unsigned int More fallout from Alex's port typing. I fixed the problem in the revised patch 1 of this series. > I don't think init_flow_miss_execute_op() needs to initialize op->mask > (though it doesn't hurt). Yeah, I did it out of an abundance of caution, in case someone uses it later. --Justin ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/4] ofproto-dpif: Handle failed flow 'put's.
On Jun 20, 2013, at 4:31 PM, Ben Pfaff wrote: > The other thing that worries me is that you mentioned that you saw > multiple installations of a single subfacet in a single batch. If so, > that worries me because it seems like this could happen: > >handle_flow_miss_with_facet() queues up subfacet sf for >installation into fast path, and sets sf->path to >SF_FAST_PATH. > >handle_flow_miss_with_facet() queues up subfacet sf for >installation into fast path, again, and sets sf->path to >SF_FAST_PATH. > > ... > >The first installation succeeds. OK. > >The second installation fails (EINVAL, EEXIST, whatever), so >we set sf->path back to SF_NOT_INSTALLED. But it was >installed by the first "put". > > But I don't understand how we'd get multiple installations to begin > with, since the first handle_flow_miss_with_facet() call would set > sf->path to SF_FAST_PATH and the second one would see that sf->path > was already SF_FAST_PATH and not try to install it again. Do you have > any idea what was going on there? I think that was just a problem I saw when I was using the xout cache to generate megaflows in userspace. I agree it shouldn't be a problem with the new method we're using. --Justin ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 1/4] odp-util: Introduce odp_flow_key_from_mask().
On Thu, Jun 20, 2013 at 04:51:53PM -0700, Justin Pettit wrote: > Add a new function for converting a mask into a set of > OVS_KEY_ATTR* attributes. This will be useful in a future commit. > > Signed-off-by: Justin Pettit Acked-by: Ben Pfaff ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/4] ofproto-dpif: Handle failed flow 'put's.
On Thu, Jun 20, 2013 at 04:51:54PM -0700, Justin Pettit wrote: > If a flow cannot be installed in the datapath, we should notice > this and not treat it as installed. This becomes an issue with > megaflows, since a batch of unique flows may come in that generate > a single new datapath megaflow that covers them. Since userspace > doesn't know whether the datapath supports megaflows, each unique > flow will get a separate flow entry (which overlap when masks are > applied) and all except the first will get rejected by a megaflow- > supporting datapath as duplicates. > > Signed-off-by: Justin Pettit The new comment in struct flow_miss_op talks about destroying subfacets, but actually we just mark them as not installed. Same for a new comment in handle_miss_upcalls(). Acked-by: Ben Pfaff ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 4/4] ofproto-dpif: Use megaflows.
On Thu, Jun 20, 2013 at 04:51:56PM -0700, Justin Pettit wrote: > The commit configures the masks generated from megaflows and pushes > them through the dpif layer. > > With this commit and a wildcard supporting OVS kernel module, > ovs-vswitchd's flow setup rate is very close to that of the Linux > bridge. > > Signed-off-by: Justin Pettit With this kind of simple commit for a big performance gain, we should have committed this months ago. Acked-by: Ben Pfaff ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [threads 02/11] ovs-thread: Add per-thread data support.
On Thu, Jun 20, 2013 at 07:49:18PM -0400, Ed Maste wrote: > On 20 June 2013 16:18, Ben Pfaff wrote: > > On Wed, Jun 19, 2013 at 01:17:03PM -0700, Ben Pfaff wrote: > >> POSIX defines a portable pthread_key_t API for per-thread data. GCC and > >> C11 have two different forms of per-thread data that are generally faster > >> than the POSIX API, where they are available. This commit adds a > >> macro-based wrapper, DEFINE_PER_THREAD_DATA, that takes advantage of the > >> GCC extension where it is available and falls back to the POSIX API > >> otherwise. (I'm not aware of any compilers that implement the C11 feature, > >> so this commit doesn't try to use it.) > > > > Ed Maste pointed out off-list that clang on FreeBSD supports > > _Thread_local. Here's a revised version of the patch that supports > > both _Thread_local and __thread. I've also updated the "reviews" > > branch. > > I changed the autoconf test like so: > > diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4 > index 3895346..eb91b1a 100644 > --- a/m4/openvswitch.m4 > +++ b/m4/openvswitch.m4 > @@ -402,7 +402,10 @@ AC_DEFUN([OVS_CHECK__THREAD_LOCAL], > [whether $CC supports _Thread_local], > [ovs_cv__Thread_local], > [AC_LINK_IFELSE( > -[AC_LANG_PROGRAM([static _Thread_local var;], [return var;])], > +[AC_LANG_PROGRAM( > + [#include > + static _Thread_local int var;], > + [return var;])], > [ovs_cv__Thread_local=yes], > [ovs_cv__Thread_local=no])]) > if test $ovs_cv__Thread_local = no; then Thanks. Can you confirm that the autoconf test needs #include but the actual program doesn't? It looks funny. Thanks, Ben. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 2/4] ofproto-dpif: Handle failed flow 'put's.
On Jun 20, 2013, at 5:04 PM, Ben Pfaff wrote: > On Thu, Jun 20, 2013 at 04:51:54PM -0700, Justin Pettit wrote: >> If a flow cannot be installed in the datapath, we should notice >> this and not treat it as installed. This becomes an issue with >> megaflows, since a batch of unique flows may come in that generate >> a single new datapath megaflow that covers them. Since userspace >> doesn't know whether the datapath supports megaflows, each unique >> flow will get a separate flow entry (which overlap when masks are >> applied) and all except the first will get rejected by a megaflow- >> supporting datapath as duplicates. >> >> Signed-off-by: Justin Pettit > > The new comment in struct flow_miss_op talks about destroying > subfacets, but actually we just mark them as not installed. > > Same for a new comment in handle_miss_upcalls(). Whoops. Fixed. Thanks, --Justin ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH] datapath: Use a single attribute array for parsing values and masks.
When parsing flow Netlink messages we currently have arrays to hold the attribute pointers for both values and masks. This results in a large stack, which some compilers warn about. It's not actually necessary to have both arrays at the same time, so we can collapse this to a single array. Reported-by: Pravin B Shelar Signed-off-by: Jesse Gross --- datapath/flow.c | 13 ++--- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/datapath/flow.c b/datapath/flow.c index 79d5d49..3a7bc9b 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -1479,7 +1479,6 @@ int ovs_match_from_nlattrs(struct sw_flow_match *match, const struct nlattr *mask) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - const struct nlattr *m[OVS_KEY_ATTR_MAX + 1]; const struct nlattr *encap; u64 key_attrs = 0; u64 mask_attrs = 0; @@ -1516,19 +1515,19 @@ int ovs_match_from_nlattrs(struct sw_flow_match *match, return err; if (mask) { - err = parse_flow_mask_nlattrs(mask, m, &mask_attrs); + err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); if (err) return err; if ((mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) && encap_valid) { __be16 eth_type = 0; - if (m[OVS_KEY_ATTR_ETHERTYPE]) - eth_type = nla_get_be16(m[OVS_KEY_ATTR_ETHERTYPE]); + if (a[OVS_KEY_ATTR_ETHERTYPE]) + eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); if (eth_type == htons(0x)) { mask_attrs &= ~(1ULL << OVS_KEY_ATTR_ETHERTYPE); - encap = m[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, m, &mask_attrs); + encap = a[OVS_KEY_ATTR_ENCAP]; + err = parse_flow_mask_nlattrs(encap, a, &mask_attrs); } else err = -EINVAL; @@ -1536,7 +1535,7 @@ int ovs_match_from_nlattrs(struct sw_flow_match *match, return err; } - err = ovs_key_from_nlattrs(match, mask_attrs, m, true); + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); if (err) return err; } else { -- 1.8.1.2 ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [PATCH v6] gre: Restructure tunneling.
Following patch restructures ovs tunneling and gre vport implementation to make ovs tunneling more in sync with upstream kernel tunneling. Doing this tunneling code is simplified as most of protocol processing on send and recv is pushed to kernel tunneling. For external ovs module the code is moved to kernel compatibility code. Signed-off-by: Pravin B Shelar --- v5-v6: - Fix skb_checksum_help - remove extra dst refcnt. v4-v5: - Added context to erro msg. - remove cb restore from gso. - use standard skb_needs_linearize(). - clearer dst refcnt. - fixed gso ip-id - removed IS_ERR_VALUE v4-v3: - Fix coding style. - Fix __build_header() parameters. - Using looser check of skb->ip_summed at __skb_gso_segment. v2-v3: - Move rcv packet datapath processing to ip_tunnel code. - Use TUNNEL_* flags. - better check gre64 vport. - return skb if there is no gre port created. v1-v2: - Fix Modules path for ip_tunnels_core.c --- datapath/compat.h |6 + datapath/flow.c | 14 +- datapath/flow.h |9 +- datapath/linux/Modules.mk |6 + datapath/linux/compat/gre.c | 352 +++ datapath/linux/compat/gso.c | 146 ++ datapath/linux/compat/gso.h | 72 + datapath/linux/compat/include/linux/if_ether.h |4 + datapath/linux/compat/include/linux/netdevice.h |6 +- datapath/linux/compat/include/net/gre.h | 102 +++ datapath/linux/compat/include/net/ip_tunnels.h | 54 datapath/linux/compat/ip_tunnels_core.c | 112 +++ datapath/linux/compat/skbuff-openvswitch.c | 29 ++- datapath/tunnel.c | 14 +- datapath/tunnel.h | 11 +- datapath/vport-gre.c| 349 +-- datapath/vport-lisp.c |2 +- datapath/vport-vxlan.c |2 +- 18 files changed, 1044 insertions(+), 246 deletions(-) create mode 100644 datapath/linux/compat/gre.c create mode 100644 datapath/linux/compat/gso.c create mode 100644 datapath/linux/compat/gso.h create mode 100644 datapath/linux/compat/include/net/gre.h create mode 100644 datapath/linux/compat/include/net/ip_tunnels.h create mode 100644 datapath/linux/compat/ip_tunnels_core.c diff --git a/datapath/compat.h b/datapath/compat.h index c7fd225..a6a01d5 100644 --- a/datapath/compat.h +++ b/datapath/compat.h @@ -94,4 +94,10 @@ static inline void skb_set_mark(struct sk_buff *skb, u32 mark) } #endif /* after 2.6.20 */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36) +#define rt_dst(rt) (rt->dst) +#else +#define rt_dst(rt) (rt->u.dst) +#endif + #endif /* compat.h */ diff --git a/datapath/flow.c b/datapath/flow.c index 79d5d49..8d21806 100644 --- a/datapath/flow.c +++ b/datapath/flow.c @@ -1146,7 +1146,7 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr, struct nlattr *a; int rem; bool ttl = false; - u16 tun_flags = 0; + __be16 tun_flags = 0; nla_for_each_nested(a, attr, rem) { int type = nla_type(a); @@ -1168,7 +1168,7 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr, case OVS_TUNNEL_KEY_ATTR_ID: SW_FLOW_KEY_PUT(match, tun_key.tun_id, nla_get_be64(a), is_mask); - tun_flags |= OVS_TNL_F_KEY; + tun_flags |= TUNNEL_KEY; break; case OVS_TUNNEL_KEY_ATTR_IPV4_SRC: SW_FLOW_KEY_PUT(match, tun_key.ipv4_src, @@ -1188,10 +1188,10 @@ int ipv4_tun_from_nlattr(const struct nlattr *attr, ttl = true; break; case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: - tun_flags |= OVS_TNL_F_DONT_FRAGMENT; + tun_flags |= TUNNEL_DONT_FRAGMENT; break; case OVS_TUNNEL_KEY_ATTR_CSUM: - tun_flags |= OVS_TNL_F_CSUM; + tun_flags |= TUNNEL_CSUM; break; default: return -EINVAL; @@ -1222,7 +1222,7 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb, if (!nla) return -EMSGSIZE; - if (tun_key->tun_flags & OVS_TNL_F_KEY && + if (tun_key->tun_flags & TUNNEL_KEY && nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id)) return -EMSGSIZE; if (tun_key->ipv4_src && @@ -1235,10 +1235,10 @@ int ipv4_tun_to_nlattr(struct sk_buff *skb, return -EMSGSIZE; if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ipv4_ttl)) return -EMSGSIZE; - if ((tun_key->tun_flags & OVS_TNL_F_DONT_FRAGMENT) && + if ((tun_ke
Re: [ovs-dev] [PATCH] datapath: Use a single attribute array for parsing values and masks.
Looks good. Just curious, which compiler warn about this? What's the warning message? On Thu, Jun 20, 2013 at 5:15 PM, Jesse Gross wrote: > When parsing flow Netlink messages we currently have arrays to hold the > attribute pointers for both values and masks. This results in a large > stack, which some compilers warn about. It's not actually necessary > to have both arrays at the same time, so we can collapse this to a > single array. > > Reported-by: Pravin B Shelar > Signed-off-by: Jesse Gross > --- > datapath/flow.c | 13 ++--- > 1 file changed, 6 insertions(+), 7 deletions(-) > > diff --git a/datapath/flow.c b/datapath/flow.c > index 79d5d49..3a7bc9b 100644 > --- a/datapath/flow.c > +++ b/datapath/flow.c > @@ -1479,7 +1479,6 @@ int ovs_match_from_nlattrs(struct sw_flow_match > *match, >const struct nlattr *mask) > { > const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; > - const struct nlattr *m[OVS_KEY_ATTR_MAX + 1]; > const struct nlattr *encap; > u64 key_attrs = 0; > u64 mask_attrs = 0; > @@ -1516,19 +1515,19 @@ int ovs_match_from_nlattrs(struct sw_flow_match > *match, > return err; > > if (mask) { > - err = parse_flow_mask_nlattrs(mask, m, &mask_attrs); > + err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); > if (err) > return err; > > if ((mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) && > encap_valid) { > __be16 eth_type = 0; > > - if (m[OVS_KEY_ATTR_ETHERTYPE]) > - eth_type = > nla_get_be16(m[OVS_KEY_ATTR_ETHERTYPE]); > + if (a[OVS_KEY_ATTR_ETHERTYPE]) > + eth_type = > nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); > if (eth_type == htons(0x)) { > mask_attrs &= ~(1ULL << > OVS_KEY_ATTR_ETHERTYPE); > - encap = m[OVS_KEY_ATTR_ENCAP]; > - err = parse_flow_mask_nlattrs(encap, m, > &mask_attrs); > + encap = a[OVS_KEY_ATTR_ENCAP]; > + err = parse_flow_mask_nlattrs(encap, a, > &mask_attrs); > } else > err = -EINVAL; > > @@ -1536,7 +1535,7 @@ int ovs_match_from_nlattrs(struct sw_flow_match > *match, > return err; > } > > - err = ovs_key_from_nlattrs(match, mask_attrs, m, true); > + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); > if (err) > return err; > } else { > -- > 1.8.1.2 > > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH] datapath: Use a single attribute array for parsing values and masks.
Thanks, I applied it. It was Pravin's compiler, not mine, so I'm not exactly sure but I believe that it is an older version of GCC. The message is that the stack size exceeds 1024 bytes (which is not automatically a problem but given that the kernel has a fixed stack size it can be dangerous if you have a few large frames). On Thu, Jun 20, 2013 at 5:19 PM, Andy Zhou wrote: > Looks good. Just curious, which compiler warn about this? What's the > warning message? > > > On Thu, Jun 20, 2013 at 5:15 PM, Jesse Gross wrote: >> >> When parsing flow Netlink messages we currently have arrays to hold the >> attribute pointers for both values and masks. This results in a large >> stack, which some compilers warn about. It's not actually necessary >> to have both arrays at the same time, so we can collapse this to a >> single array. >> >> Reported-by: Pravin B Shelar >> Signed-off-by: Jesse Gross >> --- >> datapath/flow.c | 13 ++--- >> 1 file changed, 6 insertions(+), 7 deletions(-) >> >> diff --git a/datapath/flow.c b/datapath/flow.c >> index 79d5d49..3a7bc9b 100644 >> --- a/datapath/flow.c >> +++ b/datapath/flow.c >> @@ -1479,7 +1479,6 @@ int ovs_match_from_nlattrs(struct sw_flow_match >> *match, >>const struct nlattr *mask) >> { >> const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; >> - const struct nlattr *m[OVS_KEY_ATTR_MAX + 1]; >> const struct nlattr *encap; >> u64 key_attrs = 0; >> u64 mask_attrs = 0; >> @@ -1516,19 +1515,19 @@ int ovs_match_from_nlattrs(struct sw_flow_match >> *match, >> return err; >> >> if (mask) { >> - err = parse_flow_mask_nlattrs(mask, m, &mask_attrs); >> + err = parse_flow_mask_nlattrs(mask, a, &mask_attrs); >> if (err) >> return err; >> >> if ((mask_attrs & 1ULL << OVS_KEY_ATTR_ENCAP) && >> encap_valid) { >> __be16 eth_type = 0; >> >> - if (m[OVS_KEY_ATTR_ETHERTYPE]) >> - eth_type = >> nla_get_be16(m[OVS_KEY_ATTR_ETHERTYPE]); >> + if (a[OVS_KEY_ATTR_ETHERTYPE]) >> + eth_type = >> nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); >> if (eth_type == htons(0x)) { >> mask_attrs &= ~(1ULL << >> OVS_KEY_ATTR_ETHERTYPE); >> - encap = m[OVS_KEY_ATTR_ENCAP]; >> - err = parse_flow_mask_nlattrs(encap, m, >> &mask_attrs); >> + encap = a[OVS_KEY_ATTR_ENCAP]; >> + err = parse_flow_mask_nlattrs(encap, a, >> &mask_attrs); >> } else >> err = -EINVAL; >> >> @@ -1536,7 +1535,7 @@ int ovs_match_from_nlattrs(struct sw_flow_match >> *match, >> return err; >> } >> >> - err = ovs_key_from_nlattrs(match, mask_attrs, m, true); >> + err = ovs_key_from_nlattrs(match, mask_attrs, a, true); >> if (err) >> return err; >> } else { >> -- >> 1.8.1.2 >> >> ___ >> dev mailing list >> dev@openvswitch.org >> http://openvswitch.org/mailman/listinfo/dev > > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v2.33] datapath: Add basic MPLS support to kernel
On Thu, Jun 20, 2013 at 12:28:32PM -0700, Jesse Gross wrote: > On Wed, Jun 19, 2013 at 5:56 PM, Simon Horman wrote: > > On Tue, Jun 18, 2013 at 04:06:49PM +0900, Simon Horman wrote: > >> Allow datapath to recognize and extract MPLS labels into flow keys > >> and execute actions which push, pop, and set labels on packets. > >> > >> Based heavily on work by Leo Alterman, Ravi K, Isaku Yamahata and Joe > >> Stringer. > >> > >> Cc: Ravi K > >> Cc: Leo Alterman > >> Cc: Isaku Yamahata > >> Cc: Joe Stringer > >> Signed-off-by: Simon Horman > >> > >> --- > >> > >> This patch depends on "gre: Restructure tunneling" which it aims > >> to be compatible with. > > > > To clarify. The dependency relates to a conflict when applying this patch > > which modifies datapath/linux/compat/gso.[ch], files that are > > created by "gre: Restructure tunneling". I believe it would > > be trivial to reverse the dependency so that this patch creates > > those files and "gre: Restructure tunneling" applies on top of it > > as the two patches add different functions to those files. > > > > As such I think it would be better to describe this patch > > as compatible with "gre: Restructure tunneling" rather than > > dependent on it. > > I agree that's the case. However, now that the OVS GRE code is > upstream, I think I can take just a quick pass over the GRE patch to > get it in and then we won't have any dependency issues. Thanks. Of course I am would be more than happy with that. ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v6] gre: Restructure tunneling.
On Thu, Jun 20, 2013 at 5:19 PM, Pravin B Shelar wrote: > diff --git a/datapath/linux/compat/skbuff-openvswitch.c > b/datapath/linux/compat/skbuff-openvswitch.c > index ef43ba9..720bc18 100644 > --- a/datapath/linux/compat/skbuff-openvswitch.c > +++ b/datapath/linux/compat/skbuff-openvswitch.c > +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) > +int skb_checksum_help(struct sk_buff *skb, int inward) > +#else > +int skb_checksum_help(struct sk_buff *skb) > +#endif > +{ > + /* Use default features for dst device. */ > + if (unlikely(skb_is_nonlinear(skb))) { > + int err; I think the above comment no longer applies since we're not pulling any device features. > diff --git a/datapath/vport-gre.c b/datapath/vport-gre.c > index dae35ac..fda6481 100644 > --- a/datapath/vport-gre.c > +++ b/datapath/vport-gre.c > +static int __send(struct vport *vport, struct sk_buff *skb, > + int tunnel_hlen, > + __be32 seq, __be16 gre64_flag) [...] > - ovs_tnl_rcv(vport, skb, &tun_key); > - return 0; > + /* Push Tunnel header. */ > + skb = __build_header(skb, tunnel_hlen, seq, gre64_flag); > + if (unlikely(!skb)) { > + err = 0; > + goto err_free_rt; > + } > > + df = OVS_CB(skb)->tun_key->tun_flags & TUNNEL_DONT_FRAGMENT ? > + htons(IP_DF) : 0; > + > + /* > +* Allow our local IP stack to fragment the outer packet even > +* if the DF bit is set as a last resort. We also need to > +* force selection of an IP ID here because Linux will > +* otherwise leave it at 0 if the packet originally had DF set. > +*/ I believe this comment was deleted/moved in the upstream version. Those are both minor though so Acked-by: Jesse Gross ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH 4/4] ofproto-dpif: Use megaflows.
On Jun 20, 2013, at 5:08 PM, Ben Pfaff wrote: > On Thu, Jun 20, 2013 at 04:51:56PM -0700, Justin Pettit wrote: >> The commit configures the masks generated from megaflows and pushes >> them through the dpif layer. >> >> With this commit and a wildcard supporting OVS kernel module, >> ovs-vswitchd's flow setup rate is very close to that of the Linux >> bridge. >> >> Signed-off-by: Justin Pettit > > With this kind of simple commit for a big performance gain, we should > have committed this months ago. > > Acked-by: Ben Pfaff Woo-hoo! Pushed to master and branch-1.11. Thanks for all the reviews! --Justin ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
Re: [ovs-dev] [PATCH v13 1/4] ofproto-dpif: Add 'force-miss-model' configuration
Sure. Is there a preference for where the string parsing code goes -- ofproto.c or bridge.c? The configuration applies globally, but it seems that similar configuration is parsed in the bridge code. On Fri, Jun 21, 2013 at 12:57 AM, Ben Pfaff wrote: > On Thu, Jun 20, 2013 at 09:47:38AM +0900, Simon Horman wrote: > > This adds support for specifying flow miss handling behaviour at > > runtime, through a new "other-config" option in the Open_vSwitch table. > > This is an extension to flow-eviction-threshold. > > > > By default, the behaviour is the same as before. If force-miss-model is > > set to 1, then flow miss handling will always result in the creation of > > new facets and flow-eviction-threshold will be ignored. If > > force-miss-model is set to 2, then flow miss handling will never result > > in the creation of new facets (effectively the same as setting the > > flow-eviction-threshold to 0, which is not currently configurable). > > In the database, please use some meaningful names for these modes, > instead of integers. > > Thanks, > > Ben. > ___ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev > ___ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev
[ovs-dev] [mirror 2/2] ofproto-dpif: Modularize mirror code.
This code modularizes ofproto-dpif's mirror code by moving it to ofproto-dpif-mirror. Not only does this shorten ofproto-dpif and hide complexity, but its also necessary for future patches which modularize ofproto-dpif-xlate in preparation for multi-threading. Signed-off-by: Ethan Jackson --- ofproto/automake.mk |2 + ofproto/ofproto-dpif-mirror.c | 539 + ofproto/ofproto-dpif-mirror.h | 52 ofproto/ofproto-dpif-xlate.c | 55 +++-- ofproto/ofproto-dpif-xlate.h |1 + ofproto/ofproto-dpif.c| 331 +++-- ofproto/ofproto-dpif.h| 40 --- 7 files changed, 658 insertions(+), 362 deletions(-) create mode 100644 ofproto/ofproto-dpif-mirror.c create mode 100644 ofproto/ofproto-dpif-mirror.h diff --git a/ofproto/automake.mk b/ofproto/automake.mk index b4d0876..af9a12a 100644 --- a/ofproto/automake.mk +++ b/ofproto/automake.mk @@ -26,6 +26,8 @@ ofproto_libofproto_a_SOURCES = \ ofproto/ofproto-dpif-governor.h \ ofproto/ofproto-dpif-ipfix.c \ ofproto/ofproto-dpif-ipfix.h \ + ofproto/ofproto-dpif-mirror.c \ + ofproto/ofproto-dpif-mirror.h \ ofproto/ofproto-dpif-sflow.c \ ofproto/ofproto-dpif-sflow.h \ ofproto/ofproto-dpif-xlate.c \ diff --git a/ofproto/ofproto-dpif-mirror.c b/ofproto/ofproto-dpif-mirror.c new file mode 100644 index 000..a10bfcd --- /dev/null +++ b/ofproto/ofproto-dpif-mirror.c @@ -0,0 +1,539 @@ +/* Copyright (c) 2009, 2010, 2011, 2012, 2013 Nicira, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ + +#include + +#include "ofproto-dpif-mirror.h" + +#include + +#include "hmap.h" +#include "hmapx.h" +#include "ofproto.h" +#include "vlan-bitmap.h" +#include "vlog.h" + +VLOG_DEFINE_THIS_MODULE(ofproto_dpif_mirror); + +#define MIRROR_MASK_C(X) UINT32_C(X) +BUILD_ASSERT_DECL(sizeof(mirror_mask_t) * CHAR_BIT >= MAX_MIRRORS); + +struct mbridge { +struct hmap_node hmap_node; +struct ofproto_dpif *ofproto; + +struct mirror *mirrors[MAX_MIRRORS]; +struct hmap mbundles; + +bool need_revalidate; +bool has_mirrors; +}; + +struct mbundle { +struct hmap_node hmap_node; /* In parent 'mbridge' map. */ +struct ofbundle *ofbundle; + +mirror_mask_t src_mirrors; /* Mirrors triggered when packet received. */ +mirror_mask_t dst_mirrors; /* Mirrors triggered when packet sent. */ +mirror_mask_t mirror_out; /* Mirrors that output to this mbundle. */ +}; + +struct mirror { +struct mbridge *mbridge;/* Owning ofproto. */ +size_t idx; /* In ofproto's "mirrors" array. */ +void *aux; /* Key supplied by ofproto's client. */ + +/* Selection criteria. */ +struct hmapx srcs; /* Contains "struct mbundle*"s. */ +struct hmapx dsts; /* Contains "struct mbundle*"s. */ +unsigned long *vlans; /* Bitmap of chosen VLANs, NULL selects all. */ + +/* Output (exactly one of out == NULL and out_vlan == -1 is true). */ +struct mbundle *out;/* Output port or NULL. */ +int out_vlan; /* Output VLAN or -1. */ +mirror_mask_t dup_mirrors; /* Bitmap of mirrors with the same output. */ + +/* Counters. */ +int64_t packet_count; /* Number of packets sent. */ +int64_t byte_count; /* Number of bytes sent. */ +}; + +static struct hmap mbridges = HMAP_INITIALIZER(&mbridges); + +static struct mbridge *mbridge_lookup(const struct ofproto_dpif *); +static struct mirror *mirror_lookup(struct mbridge *, void *aux); +static struct mbundle *mbundle_lookup(const struct mbridge *, + struct ofbundle *); +static struct mbundle *mbundle_lookup_ofproto(const struct ofproto_dpif *, + struct ofbundle *); +static void mbundle_lookup_multiple(const struct mbridge *, struct ofbundle **, + size_t n_bundles, struct hmapx *mbundles); +static int mirror_scan(struct mbridge *); +static void mirror_update_dups(struct mbridge *); +static int mirror_mask_ffs(mirror_mask_t); + +void +mirror_register_ofproto(struct ofproto_dpif *ofproto) +{ +struct mbridge *mbridge; + +mbridge = xzalloc(sizeof *mbridge); +mbridge->ofproto = ofproto; + +hmap_init(&mbridge->mbundles); +hmap_insert(&mbridges, &mbridge->hmap_node, hash_pointer(ofproto, 0)); +} + +void +mirror_unregister_ofpr
[ovs-dev] [mirror 1/2] ofproto-dpif: Handle dest mirrors in compose_output_action().
Before this patch, the mirroring code would retroactively insert actions for destination mirrors after actions were translated. This relied on converting datapath output actions into ofports which doesn't work for tunnels and patch ports. This patch refactors the code to handle destination mirrors at output. Signed-off-by: Ethan Jackson --- ofproto/ofproto-dpif-xlate.c | 51 +--- tests/ofproto-dpif.at| 66 ++ 2 files changed, 61 insertions(+), 56 deletions(-) diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c index a52a8cf..6512866 100644 --- a/ofproto/ofproto-dpif-xlate.c +++ b/ofproto/ofproto-dpif-xlate.c @@ -58,6 +58,7 @@ struct xlate_ctx { /* Flow at the last commit. */ struct flow base_flow; +struct flow orig_flow; /* Junk unless 'hit_resubmit_limit' or mirroring. */ /* Tunnel IP destination address as received. This is stored separately * as the base_flow.tunnel is cleared on init to reflect the datapath @@ -173,22 +174,22 @@ lookup_input_bundle(const struct ofproto_dpif *ofproto, ofp_port_t in_port, } static void -add_mirror_actions(struct xlate_ctx *ctx, const struct flow *orig_flow) +add_mirror_actions(struct xlate_ctx *ctx, mirror_mask_t mirrors) { struct ofproto_dpif *ofproto = ctx->ofproto; -mirror_mask_t mirrors; struct ofbundle *in_bundle; +struct flow xin_flow; uint16_t vlan; uint16_t vid; -const struct nlattr *a; -size_t left; -in_bundle = lookup_input_bundle(ctx->ofproto, orig_flow->in_port.ofp_port, +in_bundle = lookup_input_bundle(ctx->ofproto, +ctx->orig_flow.in_port.ofp_port, ctx->xin->packet != NULL, NULL); if (!in_bundle) { return; } -mirrors = in_bundle->src_mirrors; +mirrors |= in_bundle->src_mirrors; +mirrors &= ~ctx->xout->mirrors; /* Drop frames on bundles reserved for mirroring. */ if (in_bundle->mirror_out) { @@ -202,35 +203,18 @@ add_mirror_actions(struct xlate_ctx *ctx, const struct flow *orig_flow) } /* Check VLAN. */ -vid = vlan_tci_to_vid(orig_flow->vlan_tci); +vid = vlan_tci_to_vid(ctx->orig_flow.vlan_tci); if (!input_vid_is_valid(vid, in_bundle, ctx->xin->packet != NULL)) { return; } vlan = input_vid_to_vlan(in_bundle, vid); -/* Look at the output ports to check for destination selections. */ - -NL_ATTR_FOR_EACH (a, left, ctx->xout->odp_actions.data, - ctx->xout->odp_actions.size) { -enum ovs_action_attr type = nl_attr_type(a); -struct ofport_dpif *ofport; - -if (type != OVS_ACTION_ATTR_OUTPUT) { -continue; -} - -ofport = get_odp_port(ofproto, nl_attr_get_odp_port(a)); -if (ofport && ofport->bundle) { -mirrors |= ofport->bundle->dst_mirrors; -} -} - if (!mirrors) { return; } -/* Restore the original packet before adding the mirror actions. */ -ctx->xin->flow = *orig_flow; +xin_flow = ctx->xin->flow; +ctx->xin->flow = ctx->orig_flow; while (mirrors) { struct ofmirror *m; @@ -251,7 +235,7 @@ add_mirror_actions(struct xlate_ctx *ctx, const struct flow *orig_flow) if (m->out) { output_normal(ctx, m->out, vlan); } else if (vlan != m->out_vlan - && !eth_addr_is_reserved(orig_flow->dl_dst)) { + && !eth_addr_is_reserved(ctx->orig_flow.dl_dst)) { struct ofbundle *bundle; HMAP_FOR_EACH (bundle, hmap_node, &ofproto->bundles) { @@ -262,6 +246,7 @@ add_mirror_actions(struct xlate_ctx *ctx, const struct flow *orig_flow) } } } +ctx->xin->flow = xin_flow; } /* Given 'vid', the VID obtained from the 802.1Q header that was received as @@ -856,6 +841,10 @@ compose_output_action__(struct xlate_ctx *ctx, ofp_port_t ofp_port, return; } +if (ctx->ofproto->has_mirrors && ofport->bundle) { +add_mirror_actions(ctx, ofport->bundle->dst_mirrors); +} + if (ofport->peer) { struct ofport_dpif *peer = ofport->peer; struct flow old_flow = ctx->xin->flow; @@ -1864,7 +1853,6 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) enum slow_path_reason special; const struct ofpact *ofpacts; struct ofport_dpif *in_port; -struct flow orig_flow; struct xlate_ctx ctx; size_t ofpacts_len; @@ -1946,7 +1934,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out *xout) if (ctx.ofproto->has_mirrors || hit_resubmit_limit) { /* Do this conditionally because the copy is expensive enough that it * shows up in profiles. */ -orig_flow = *flow; +ctx.orig_flow = *flow; } if (flow->nw_frag & FLOW_NW_FRAG_ANY) { @@ -2009,7 +1997,8 @@ xlat