[ovs-dev] [patch net-next 03/13] net: introduce generic switch devices support

2014-09-03 Thread Jiri Pirko
The goal of this is to provide a possibility to suport various switch
chips. Drivers should implement relevant ndos to do so. Now there is a
couple of ndos defines:
- for getting physical switch id is in place.
- for work with flows.

Note that user can use random port netdevice to access the switch.

Signed-off-by: Jiri Pirko 
---
 Documentation/networking/switchdev.txt |  53 ++
 MAINTAINERS|   7 ++
 include/linux/netdevice.h  |  28 ++
 include/net/sw_flow.h  |  14 +++
 include/net/switchdev.h|  44 +
 net/Kconfig|   1 +
 net/Makefile   |   3 +
 net/switchdev/Kconfig  |   9 ++
 net/switchdev/Makefile |   5 +
 net/switchdev/switchdev.c  | 172 +
 10 files changed, 336 insertions(+)
 create mode 100644 Documentation/networking/switchdev.txt
 create mode 100644 include/net/switchdev.h
 create mode 100644 net/switchdev/Kconfig
 create mode 100644 net/switchdev/Makefile
 create mode 100644 net/switchdev/switchdev.c

diff --git a/Documentation/networking/switchdev.txt 
b/Documentation/networking/switchdev.txt
new file mode 100644
index 000..435746a
--- /dev/null
+++ b/Documentation/networking/switchdev.txt
@@ -0,0 +1,53 @@
+Switch device drivers HOWTO
+===
+
+First lets describe a topology a bit. Imagine the following example:
+
+   +++---+
+   | SOME switch chip   ||  CPU  |
+   +++---+
+   port1 port2 port3 port4 MNGMNT| PCI-E |
+ | | | | |   +---+
+PHY   PHY| | | |  NIC0 NIC1
+ | | | |   ||
+ | | +- PCI-E -+   ||
+ | +--- MII ---+|
+ +- MII +
+
+In this example, there are two independent lines between the switch silicon
+and CPU. NIC0 and NIC1 drivers are not aware of a switch presence. They are
+separate from the switch driver. SOME switch chip is by managed by a driver
+via PCI-E device MNGMNT. Note that MNGMNT device, NIC0 and NIC1 may be
+connected to some other type of bus.
+
+Now, for the previous example show the representation in kernel:
+
+   +++---+
+   | SOME switch chip   ||  CPU  |
+   +++---+
+   sw0p0 sw0p1 sw0p2 sw0p3 MNGMNT| PCI-E |
+ | | | | |   +---+
+PHY   PHY| | | |  eth0 eth1
+ | | | |   ||
+ | | +- PCI-E -+   ||
+ | +--- MII ---+|
+ +- MII +
+
+Lets call the example switch driver for SOME switch chip "SOMEswitch". This
+driver takes care of PCI-E device MNGMNT. There is a netdevice instance sw0pX
+created for each port of a switch. These netdevices are instances
+of "SOMEswitch" driver. sw0pX netdevices serve as a "representation"
+of the switch chip. eth0 and eth1 are instances of some other existing driver.
+
+The only difference of the switch-port netdevice from the ordinary netdevice
+is that is implements couple more NDOs:
+
+   ndo_swdev_get_id - This returns the same ID for two port netdevices of
+  the same physical switch chip. This is mandatory to
+  be implemented by all switch drivers and serves
+  the caller for recognition of a port netdevice.
+   ndo_swdev_* - Functions that serve for a manipulation of the switch chip
+ itself. They are not port-specific. Caller might use
+ arbitrary port netdevice of the same switch and it will
+ make no difference.
+   ndo_swportdev_* - Functions that serve for a port-specific manipulation.
diff --git a/MAINTAINERS b/MAINTAINERS
index c9b4b55..4baaf44 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8808,6 +8808,13 @@ F:   lib/swiotlb.c
 F: arch/*/kernel/pci-swiotlb.c
 F: include/linux/swiotlb.h
 
+SWITCHDEV
+M: Jiri Pirko 
+L: net...@vger.kernel.org
+S: Supported
+F: net/switchdev/
+F: include/net/switchdev.h
+
 SYNOPSYS ARC ARCHITECTURE
 M: Vineet Gupta 
 S: Supported
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9faeea6..6a009d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -46,9 +46,11 @@
 #include 
 #endif
 #include 
+#include 
 
 #include 
 #include 
+
 #include 
 
 struct netpoll_info;
@@ -997,6 +999,24 @@ typedef u16 (*select_queue_fallback_t)(struct net_d

[ovs-dev] [patch net-next 10/13] openvswitch: add support for datapath hardware offload

2014-09-03 Thread Jiri Pirko
Benefit from the possibility to work with flows in switch devices and
use the swdev api to offload flow datapath.

Signed-off-by: Jiri Pirko 
---
 net/openvswitch/Makefile   |   3 +-
 net/openvswitch/datapath.c |  33 ++
 net/openvswitch/datapath.h |   3 +
 net/openvswitch/flow_table.c   |   1 +
 net/openvswitch/hw_offload.c   | 245 +
 net/openvswitch/hw_offload.h   |  22 
 net/openvswitch/vport-netdev.c |   3 +
 net/openvswitch/vport.h|   2 +
 8 files changed, 311 insertions(+), 1 deletion(-)
 create mode 100644 net/openvswitch/hw_offload.c
 create mode 100644 net/openvswitch/hw_offload.h

diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 3591cb5..5152437 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -13,7 +13,8 @@ openvswitch-y := \
flow_table.o \
vport.o \
vport-internal_dev.o \
-   vport-netdev.o
+   vport-netdev.o \
+   hw_offload.o
 
 ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
 openvswitch-y += vport-vxlan.o
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 75bb07f..3e43e1d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -57,6 +57,7 @@
 #include "flow_netlink.h"
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
+#include "hw_offload.h"
 
 int ovs_net_id __read_mostly;
 
@@ -864,6 +865,9 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
acts = NULL;
goto err_unlock_ovs;
}
+   error = ovs_hw_flow_insert(dp, new_flow);
+   if (error)
+   pr_warn("failed to insert flow into hw\n");
 
if (unlikely(reply)) {
error = ovs_flow_cmd_fill_info(new_flow,
@@ -896,10 +900,18 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
goto err_unlock_ovs;
}
}
+   error = ovs_hw_flow_remove(dp, flow);
+   if (error)
+   pr_warn("failed to remove flow from hw\n");
+
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
rcu_assign_pointer(flow->sf_acts, acts);
 
+   error = ovs_hw_flow_insert(dp, flow);
+   if (error)
+   pr_warn("failed to insert flow into hw\n");
+
if (unlikely(reply)) {
error = ovs_flow_cmd_fill_info(flow,
   ovs_header->dp_ifindex,
@@ -993,9 +1005,17 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct 
genl_info *info)
 
/* Update actions, if present. */
if (likely(acts)) {
+   error = ovs_hw_flow_remove(dp, flow);
+   if (error)
+   pr_warn("failed to remove flow from hw\n");
+
old_acts = ovsl_dereference(flow->sf_acts);
rcu_assign_pointer(flow->sf_acts, acts);
 
+   error = ovs_hw_flow_insert(dp, flow);
+   if (error)
+   pr_warn("failed to insert flow into hw\n");
+
if (unlikely(reply)) {
error = ovs_flow_cmd_fill_info(flow,
   ovs_header->dp_ifindex,
@@ -1109,6 +1129,9 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct 
genl_info *info)
}
 
if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+   err = ovs_hw_flow_flush(dp);
+   if (err)
+   pr_warn("failed to flush flows from hw\n");
err = ovs_flow_tbl_flush(&dp->table);
goto unlock;
}
@@ -1120,6 +1143,9 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct 
genl_info *info)
}
 
ovs_flow_tbl_remove(&dp->table, flow);
+   err = ovs_hw_flow_remove(dp, flow);
+   if (err)
+   pr_warn("failed to remove flow from hw\n");
ovs_unlock();
 
reply = ovs_flow_cmd_alloc_info((const struct ovs_flow_actions __force 
*) flow->sf_acts,
@@ -1368,6 +1394,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
INIT_HLIST_HEAD(&dp->ports[i]);
 
+   INIT_LIST_HEAD(&dp->swdev_rep_list);
+
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
parms.type = OVS_VPORT_TYPE_INTERNAL;
@@ -1431,6 +1459,7 @@ err:
 static void __dp_destroy(struct datapath *dp)
 {
int i;
+   int err;
 
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
@@ -1448,6 +1477,10 @@ static void __dp_destroy(struct datapath *dp)
 */
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
 
+   err = ovs_hw_flow_flush(d

[ovs-dev] [patch net-next 05/13] net-sysfs: expose physical switch id for particular device

2014-09-03 Thread Jiri Pirko
Signed-off-by: Jiri Pirko 
---
 net/core/net-sysfs.c | 24 
 1 file changed, 24 insertions(+)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 55dc4da..51cd5ab 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -399,6 +400,28 @@ static ssize_t phys_port_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(phys_port_id);
 
+static ssize_t phys_switch_id_show(struct device *dev,
+  struct device_attribute *attr, char *buf)
+{
+   struct net_device *netdev = to_net_dev(dev);
+   ssize_t ret = -EINVAL;
+
+   if (!rtnl_trylock())
+   return restart_syscall();
+
+   if (dev_isalive(netdev)) {
+   struct netdev_phys_item_id ppid;
+
+   ret = swdev_get_id(netdev, &ppid);
+   if (!ret)
+   ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id);
+   }
+   rtnl_unlock();
+
+   return ret;
+}
+static DEVICE_ATTR_RO(phys_switch_id);
+
 static struct attribute *net_class_attrs[] = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
@@ -423,6 +446,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_flags.attr,
&dev_attr_tx_queue_len.attr,
&dev_attr_phys_port_id.attr,
+   &dev_attr_phys_switch_id.attr,
NULL,
 };
 ATTRIBUTE_GROUPS(net_class);
-- 
1.9.3

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [patch net-next 00/13] introduce rocker switch driver with openvswitch hardware accelerated datapath

2014-09-03 Thread Jiri Pirko
This patchset can be divided into 3 main sections:
- introduce switchdev api for implementing switch drivers
- add hardware acceleration bits into openvswitch datapath, This uses
  previously mentioned switchdev api
- introduce rocker switch driver which implements switchdev api

More info in separate patches.

So now there is possible out of the box to create ovs bridge over rocker
switch ports and the flows will be offloaded into hardware.

RFC->v1 changes:
- moved include/linux/*.h -> include/net/
- moved net/core/switchdev.c -> net/switchdev/
- moved drivers/net/rocker.* -> drivers/net/ethernet/rocker/
- fixed couple of little bugs and typos
- in dsa the switch id is generated randomly
- fixed rocker schedule in atomic context bug in rocker_port_set_rx_mode 
- added switchdev Netlink API

Jiri Pirko (13):
  openvswitch: split flow structures into ovs specific and generic ones
  net: rename netdev_phys_port_id to more generic name
  net: introduce generic switch devices support
  rtnl: expose physical switch id for particular device
  net-sysfs: expose physical switch id for particular device
  net: introduce dummy switch
  dsa: implement ndo_swdev_get_id
  net: introduce netdev_phys_item_ids_match helper
  openvswitch: introduce vport_op get_netdev
  openvswitch: add support for datapath hardware offload
  sw_flow: add misc section to key with in_port_ifindex field
  rocker: introduce rocker switch driver
  switchdev: introduce Netlink API

 Documentation/networking/switchdev.txt   |   53 +
 MAINTAINERS  |   14 +
 drivers/net/Kconfig  |7 +
 drivers/net/Makefile |1 +
 drivers/net/dummyswitch.c|  130 +
 drivers/net/ethernet/Kconfig |1 +
 drivers/net/ethernet/Makefile|1 +
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c  |2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |2 +-
 drivers/net/ethernet/rocker/Kconfig  |   29 +
 drivers/net/ethernet/rocker/Makefile |5 +
 drivers/net/ethernet/rocker/rocker.c | 3553 ++
 drivers/net/ethernet/rocker/rocker.h |  465 +++
 include/linux/netdevice.h|   54 +-
 include/net/dsa.h|1 +
 include/net/sw_flow.h|  116 +
 include/net/switchdev.h  |   44 +
 include/uapi/linux/if_link.h |   10 +
 include/uapi/linux/switchdev.h   |  119 +
 net/Kconfig  |1 +
 net/Makefile |3 +
 net/core/dev.c   |2 +-
 net/core/net-sysfs.c |   26 +-
 net/core/rtnetlink.c |   30 +-
 net/dsa/Kconfig  |2 +-
 net/dsa/dsa.c|3 +
 net/dsa/slave.c  |   10 +
 net/openvswitch/Makefile |3 +-
 net/openvswitch/actions.c|3 +-
 net/openvswitch/datapath.c   |  109 +-
 net/openvswitch/datapath.h   |7 +-
 net/openvswitch/dp_notify.c  |7 +-
 net/openvswitch/flow.c   |6 +-
 net/openvswitch/flow.h   |  102 +-
 net/openvswitch/flow_netlink.c   |   53 +-
 net/openvswitch/flow_netlink.h   |   10 +-
 net/openvswitch/flow_table.c |  119 +-
 net/openvswitch/flow_table.h |   30 +-
 net/openvswitch/hw_offload.c |  267 ++
 net/openvswitch/hw_offload.h |   22 +
 net/openvswitch/vport-gre.c  |4 +-
 net/openvswitch/vport-internal_dev.c |   56 +-
 net/openvswitch/vport-netdev.c   |   19 +
 net/openvswitch/vport-netdev.h   |   12 -
 net/openvswitch/vport-vxlan.c|2 +-
 net/openvswitch/vport.c  |2 +-
 net/openvswitch/vport.h  |6 +-
 net/switchdev/Kconfig|   20 +
 net/switchdev/Makefile   |6 +
 net/switchdev/switchdev.c|  174 ++
 net/switchdev/switchdev_netlink.c|  493 +++
 53 files changed, 5931 insertions(+), 289 deletions(-)
 create mode 100644 Documentation/networking/switchdev.txt
 create mode 100644 drivers/net/dummyswitch.c
 create mode 100644 drivers/net/ethernet/rocker/Kconfig
 create mode 100644 drivers/net/ethernet/rocker/Makefile
 create mode 100644 drivers/net/ethernet/rocker/rocker.c
 create mode 100644 

[ovs-dev] [patch net-next 06/13] net: introduce dummy switch

2014-09-03 Thread Jiri Pirko
Dummy switch implementation using switchdev interface

Signed-off-by: Jiri Pirko 
---
 drivers/net/Kconfig  |   7 +++
 drivers/net/Makefile |   1 +
 drivers/net/dummyswitch.c| 130 +++
 include/uapi/linux/if_link.h |   9 +++
 4 files changed, 147 insertions(+)
 create mode 100644 drivers/net/dummyswitch.c

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c6f6f69..7822c74 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -71,6 +71,13 @@ config DUMMY
  To compile this driver as a module, choose M here: the module
  will be called dummy.
 
+config NET_DUMMY_SWITCH
+   tristate "Dummy switch net driver support"
+   depends on NET_SWITCHDEV
+   ---help---
+ To compile this driver as a module, choose M here: the module
+ will be called dummyswitch.
+
 config EQUALIZER
tristate "EQL (serial line load balancing) support"
---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 61aefdd..3c835ba 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -7,6 +7,7 @@
 #
 obj-$(CONFIG_BONDING) += bonding/
 obj-$(CONFIG_DUMMY) += dummy.o
+obj-$(CONFIG_NET_DUMMY_SWITCH) += dummyswitch.o
 obj-$(CONFIG_EQUALIZER) += eql.o
 obj-$(CONFIG_IFB) += ifb.o
 obj-$(CONFIG_MACVLAN) += macvlan.o
diff --git a/drivers/net/dummyswitch.c b/drivers/net/dummyswitch.c
new file mode 100644
index 000..7e1a54c
--- /dev/null
+++ b/drivers/net/dummyswitch.c
@@ -0,0 +1,130 @@
+/*
+ * drivers/net/dummyswitch.c - Dummy switch device
+ * Copyright (c) 2014 Jiri Pirko 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+struct dummyswport_priv {
+   struct netdev_phys_item_id psid;
+};
+
+static netdev_tx_t dummyswport_start_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+   dev_kfree_skb(skb);
+   return NETDEV_TX_OK;
+}
+
+static int dummyswport_swdev_get_id(struct net_device *dev,
+   struct netdev_phys_item_id *psid)
+{
+   struct dummyswport_priv *dsp = netdev_priv(dev);
+
+   memcpy(psid, &dsp->psid, sizeof(*psid));
+   return 0;
+}
+
+static int dummyswport_change_carrier(struct net_device *dev, bool new_carrier)
+{
+   if (new_carrier)
+   netif_carrier_on(dev);
+   else
+   netif_carrier_off(dev);
+   return 0;
+}
+
+static const struct net_device_ops dummyswport_netdev_ops = {
+   .ndo_start_xmit = dummyswport_start_xmit,
+   .ndo_swdev_get_id   = dummyswport_swdev_get_id,
+   .ndo_change_carrier = dummyswport_change_carrier,
+};
+
+static void dummyswport_setup(struct net_device *dev)
+{
+   ether_setup(dev);
+
+   /* Initialize the device structure. */
+   dev->netdev_ops = &dummyswport_netdev_ops;
+   dev->destructor = free_netdev;
+
+   /* Fill in device structure with ethernet-generic values. */
+   dev->tx_queue_len = 0;
+   dev->flags |= IFF_NOARP;
+   dev->flags &= ~IFF_MULTICAST;
+   dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+   dev->features   |= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO;
+   dev->features   |= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
+   eth_hw_addr_random(dev);
+}
+
+static int dummyswport_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+   if (tb[IFLA_ADDRESS])
+   return -EINVAL;
+   if (!data || !data[IFLA_DUMMYSWPORT_PHYS_SWITCH_ID])
+   return -EINVAL;
+   return 0;
+}
+
+static int dummyswport_newlink(struct net *src_net, struct net_device *dev,
+  struct nlattr *tb[], struct nlattr *data[])
+{
+   struct dummyswport_priv *dsp = netdev_priv(dev);
+   int err;
+
+   dsp->psid.id_len = nla_len(data[IFLA_DUMMYSWPORT_PHYS_SWITCH_ID]);
+   memcpy(dsp->psid.id, nla_data(data[IFLA_DUMMYSWPORT_PHYS_SWITCH_ID]),
+  dsp->psid.id_len);
+
+   err = register_netdevice(dev);
+   if (err)
+   return err;
+
+   netif_carrier_on(dev);
+
+   return 0;
+}
+
+static const struct nla_policy dummyswport_policy[IFLA_DUMMYSWPORT_MAX + 1] = {
+   [IFLA_DUMMYSWPORT_PHYS_SWITCH_ID] = { .type = NLA_BINARY,
+ .len = MAX_PHYS_ITEM_ID_LEN },
+};
+
+static struct rtnl_link_ops dummyswport_link_ops __read_mostly = {
+   .kind   = "dummyswport",
+   .priv_size  = sizeof(struct dummyswport_priv),
+   .setup  = dummyswport_setup,
+   .validate   = dummyswport_validate,
+   .newlink= dummyswport_newlink,
+   .policy = dummyswport_policy,
+ 

[ovs-dev] [patch net-next 04/13] rtnl: expose physical switch id for particular device

2014-09-03 Thread Jiri Pirko
The netdevice represents a port in a switch, it will expose
IFLA_PHYS_SWITCH_ID value via rtnl. Two netdevices with the same value
belong to one physical switch.

Signed-off-by: Jiri Pirko 
---
 include/uapi/linux/if_link.h |  1 +
 net/core/rtnetlink.c | 26 +-
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ff95760..fe6c4c5 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -145,6 +145,7 @@ enum {
IFLA_CARRIER,
IFLA_PHYS_PORT_ID,
IFLA_CARRIER_CHANGES,
+   IFLA_PHYS_SWITCH_ID,
__IFLA_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1087c6d..ef1450f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -43,6 +43,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -868,7 +869,8 @@ static noinline size_t if_nlmsg_size(const struct 
net_device *dev,
   + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + 
IFLA_PORT_SELF */
   + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
   + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
-  + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_PORT_ID */
+  + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */
+  + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_SWITCH_ID */
 }
 
 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -967,6 +969,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, 
struct net_device *dev)
return 0;
 }
 
+static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device 
*dev)
+{
+   int err;
+   struct netdev_phys_item_id psid;
+
+   err = swdev_get_id(dev, &psid);
+   if (err) {
+   if (err == -EOPNOTSUPP)
+   return 0;
+   return err;
+   }
+
+   if (nla_put(skb, IFLA_PHYS_SWITCH_ID, psid.id_len, psid.id))
+   return -EMSGSIZE;
+
+   return 0;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
int type, u32 pid, u32 seq, u32 change,
unsigned int flags, u32 ext_filter_mask)
@@ -1039,6 +1059,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct 
net_device *dev,
if (rtnl_phys_port_id_fill(skb, dev))
goto nla_put_failure;
 
+   if (rtnl_phys_switch_id_fill(skb, dev))
+   goto nla_put_failure;
+
attr = nla_reserve(skb, IFLA_STATS,
sizeof(struct rtnl_link_stats));
if (attr == NULL)
@@ -1198,6 +1221,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_NUM_RX_QUEUES]= { .type = NLA_U32 },
[IFLA_PHYS_PORT_ID] = { .type = NLA_BINARY, .len = 
MAX_PHYS_ITEM_ID_LEN },
[IFLA_CARRIER_CHANGES]  = { .type = NLA_U32 },  /* ignored */
+   [IFLA_PHYS_SWITCH_ID]   = { .type = NLA_BINARY, .len = 
MAX_PHYS_ITEM_ID_LEN },
 };
 
 static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
-- 
1.9.3

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [patch net-next 09/13] openvswitch: introduce vport_op get_netdev

2014-09-03 Thread Jiri Pirko
This will allow to query easily if the vport has netdev. Also it allows
to unexpose netdev_vport_priv and struct netdev_vport.

Signed-off-by: Jiri Pirko 
---
 net/openvswitch/datapath.c   |  2 +-
 net/openvswitch/dp_notify.c  |  7 ++---
 net/openvswitch/vport-internal_dev.c | 56 
 net/openvswitch/vport-netdev.c   | 16 +++
 net/openvswitch/vport-netdev.h   | 12 
 net/openvswitch/vport.h  |  2 ++
 6 files changed, 59 insertions(+), 36 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 683d6cd..75bb07f 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -171,7 +171,7 @@ static int get_dpifindex(struct datapath *dp)
 
local = ovs_vport_rcu(dp, OVSP_LOCAL);
if (local)
-   ifindex = netdev_vport_priv(local)->dev->ifindex;
+   ifindex = local->ops->get_netdev(local)->ifindex;
else
ifindex = 0;
 
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
index 2c631fe..d2cc24b 100644
--- a/net/openvswitch/dp_notify.c
+++ b/net/openvswitch/dp_notify.c
@@ -58,13 +58,12 @@ void ovs_dp_notify_wq(struct work_struct *work)
struct hlist_node *n;
 
hlist_for_each_entry_safe(vport, n, &dp->ports[i], 
dp_hash_node) {
-   struct netdev_vport *netdev_vport;
+   struct net_device *dev;
 
if (vport->ops->type != OVS_VPORT_TYPE_NETDEV)
continue;
-
-   netdev_vport = netdev_vport_priv(vport);
-   if (!(netdev_vport->dev->priv_flags & 
IFF_OVS_DATAPATH))
+   dev = vport->ops->get_netdev(vport);
+   if (!(dev->priv_flags & IFF_OVS_DATAPATH))
dp_detach_port_notify(vport);
}
}
diff --git a/net/openvswitch/vport-internal_dev.c 
b/net/openvswitch/vport-internal_dev.c
index 8451612..6be7928 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -32,6 +32,17 @@
 #include "vport-internal_dev.h"
 #include "vport-netdev.h"
 
+struct internal_dev_vport {
+   struct rcu_head rcu;
+   struct net_device *dev;
+};
+
+static struct internal_dev_vport *
+internal_dev_vport_priv(const struct vport *vport)
+{
+   return vport_priv(vport);
+}
+
 struct internal_dev {
struct vport *vport;
 };
@@ -154,49 +165,50 @@ static void do_setup(struct net_device *netdev)
 static struct vport *internal_dev_create(const struct vport_parms *parms)
 {
struct vport *vport;
-   struct netdev_vport *netdev_vport;
+   struct internal_dev_vport *int_vport;
struct internal_dev *internal_dev;
+   struct net_device *dev;
int err;
 
-   vport = ovs_vport_alloc(sizeof(struct netdev_vport),
+   vport = ovs_vport_alloc(sizeof(struct internal_dev_vport),
&ovs_internal_vport_ops, parms);
if (IS_ERR(vport)) {
err = PTR_ERR(vport);
goto error;
}
 
-   netdev_vport = netdev_vport_priv(vport);
+   int_vport = internal_dev_vport_priv(vport);
 
-   netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
-parms->name, NET_NAME_UNKNOWN,
-do_setup);
-   if (!netdev_vport->dev) {
+   dev = alloc_netdev(sizeof(struct internal_dev), parms->name,
+  NET_NAME_UNKNOWN, do_setup);
+   if (!dev) {
err = -ENOMEM;
goto error_free_vport;
}
+   int_vport->dev = dev;
 
-   dev_net_set(netdev_vport->dev, ovs_dp_get_net(vport->dp));
-   internal_dev = internal_dev_priv(netdev_vport->dev);
+   dev_net_set(dev, ovs_dp_get_net(vport->dp));
+   internal_dev = internal_dev_priv(dev);
internal_dev->vport = vport;
 
/* Restrict bridge port to current netns. */
if (vport->port_no == OVSP_LOCAL)
-   netdev_vport->dev->features |= NETIF_F_NETNS_LOCAL;
+   dev->features |= NETIF_F_NETNS_LOCAL;
 
rtnl_lock();
-   err = register_netdevice(netdev_vport->dev);
+   err = register_netdevice(dev);
if (err)
goto error_free_netdev;
 
-   dev_set_promiscuity(netdev_vport->dev, 1);
+   dev_set_promiscuity(dev, 1);
rtnl_unlock();
-   netif_start_queue(netdev_vport->dev);
+   netif_start_queue(dev);
 
return vport;
 
 error_free_netdev:
rtnl_unlock();
-   free_netdev(netdev_vport->dev);
+   free_netdev(dev);
 error_free_vport:
ovs_vport_free(vport);
 error:
@@ -205,21 +217,21 @@ error:
 
 static void internal_dev_destroy(struct v

[ovs-dev] [patch net-next 11/13] sw_flow: add misc section to key with in_port_ifindex field

2014-09-03 Thread Jiri Pirko
Signed-off-by: Jiri Pirko 
---
 include/net/sw_flow.h|  3 +++
 net/openvswitch/hw_offload.c | 22 ++
 net/switchdev/switchdev.c|  2 ++
 3 files changed, 27 insertions(+)

diff --git a/include/net/sw_flow.h b/include/net/sw_flow.h
index 3af7758..a144d8e 100644
--- a/include/net/sw_flow.h
+++ b/include/net/sw_flow.h
@@ -69,6 +69,9 @@ struct sw_flow_key {
} nd;
} ipv6;
};
+   struct {
+   u32 in_port_ifindex; /* Input switch port ifindex (or 0). */
+   } misc;
 } __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
 
 struct sw_flow_key_range {
diff --git a/net/openvswitch/hw_offload.c b/net/openvswitch/hw_offload.c
index 45a0c5f..5c3edd0 100644
--- a/net/openvswitch/hw_offload.c
+++ b/net/openvswitch/hw_offload.c
@@ -83,6 +83,24 @@ errout:
return err;
 }
 
+void ovs_hw_flow_adjust(struct datapath *dp, struct ovs_flow *flow)
+{
+   struct vport *vport;
+
+   flow->flow.key.misc.in_port_ifindex = 0;
+   flow->flow.mask->key.misc.in_port_ifindex = 0;
+   vport = ovs_vport_ovsl(dp, flow->flow.key.phy.in_port);
+   if (vport && vport->ops->type == OVS_VPORT_TYPE_NETDEV) {
+   struct net_device *dev;
+
+   dev = vport->ops->get_netdev(vport);
+   if (dev) {
+   flow->flow.key.misc.in_port_ifindex = dev->ifindex;
+   flow->flow.mask->key.misc.in_port_ifindex = 0x;
+   }
+   }
+}
+
 int ovs_hw_flow_insert(struct datapath *dp, struct ovs_flow *flow)
 {
struct sw_flow_actions *actions;
@@ -93,6 +111,8 @@ int ovs_hw_flow_insert(struct datapath *dp, struct ovs_flow 
*flow)
ASSERT_OVSL();
BUG_ON(flow->flow.actions);
 
+   ovs_hw_flow_adjust(dp, flow);
+
err = sw_flow_action_create(dp, &actions, flow->sf_acts);
if (err)
return err;
@@ -124,6 +144,8 @@ int ovs_hw_flow_remove(struct datapath *dp, struct ovs_flow 
*flow)
 
ASSERT_OVSL();
 
+   ovs_hw_flow_adjust(dp, flow);
+
if (!flow->flow.actions) {
err = sw_flow_action_create(dp, &actions, flow->sf_acts);
if (err)
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index e079707..05acb0b 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -129,6 +129,8 @@ static void print_flow(const struct sw_flow *flow, struct 
net_device *dev,
print_flow_key_ip(PREFIX_MASK, &flow->mask->key);
print_flow_key_ipv4(PREFIX_NONE, &flow->key);
print_flow_key_ipv4(PREFIX_MASK, &flow->mask->key);
+   print_flow_key_misc(PREFIX_NONE, &flow->key);
+   print_flow_key_misc(PREFIX_MASK, &flow->mask->key);
print_flow_actions(flow->actions);
 }
 
-- 
1.9.3

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [patch net-next 07/13] dsa: implement ndo_swdev_get_id

2014-09-03 Thread Jiri Pirko
Signed-off-by: Jiri Pirko 
---
 include/linux/netdevice.h |  3 ++-
 include/net/dsa.h |  1 +
 net/dsa/Kconfig   |  2 +-
 net/dsa/dsa.c |  3 +++
 net/dsa/slave.c   | 10 ++
 5 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6a009d1..7ee070f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -41,7 +41,6 @@
 
 #include 
 #include 
-#include 
 #ifdef CONFIG_DCB
 #include 
 #endif
@@ -1259,6 +1258,8 @@ enum netdev_priv_flags {
 #define IFF_LIVE_ADDR_CHANGE   IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLANIFF_MACVLAN
 
+#include 
+
 /**
  * struct net_device - The DEVICE structure.
  * Actually, this whole structure is a big mistake.  It mixes I/O
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 9771292..d60cd42 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -140,6 +140,7 @@ struct dsa_switch {
u32 phys_mii_mask;
struct mii_bus  *slave_mii_bus;
struct net_device   *ports[DSA_MAX_PORTS];
+   struct netdev_phys_item_id psid;
 };
 
 static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index a585fd6..4e144a2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,6 +1,6 @@
 config HAVE_NET_DSA
def_bool y
-   depends on NETDEVICES && !S390
+   depends on NETDEVICES && NET_SWITCHDEV && !S390
 
 # Drivers must select NET_DSA and the appropriate tagging format
 
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 61f145c..374912d 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -202,6 +202,9 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
ds->ports[i] = slave_dev;
}
 
+   ds->psid.id_len = MAX_PHYS_ITEM_ID_LEN;
+   get_random_bytes(ds->psid.id, ds->psid.id_len);
+
return ds;
 
 out_free:
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 7333a4a..d79a6c7 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -192,6 +192,15 @@ static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff 
*skb,
return NETDEV_TX_OK;
 }
 
+static int dsa_slave_swdev_get_id(struct net_device *dev,
+ struct netdev_phys_item_id *psid)
+{
+   struct dsa_slave_priv *p = netdev_priv(dev);
+   struct dsa_switch *ds = p->parent;
+
+   memcpy(psid, &ds->psid, sizeof(*psid));
+   return 0;
+}
 
 /* ethtool operations ***/
 static int
@@ -323,6 +332,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
.ndo_set_rx_mode= dsa_slave_set_rx_mode,
.ndo_set_mac_address= dsa_slave_set_mac_address,
.ndo_do_ioctl   = dsa_slave_ioctl,
+   .ndo_swdev_get_id   = dsa_slave_swdev_get_id,
 };
 
 static const struct dsa_device_ops notag_netdev_ops = {
-- 
1.9.3

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [patch net-next 08/13] net: introduce netdev_phys_item_ids_match helper

2014-09-03 Thread Jiri Pirko
Signed-off-by: Jiri Pirko 
Acked-by: Scott Feldman 
---
 include/linux/netdevice.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7ee070f..b2c3ff0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -750,6 +750,13 @@ struct netdev_phys_item_id {
unsigned char id_len;
 };
 
+static inline bool netdev_phys_item_ids_match(struct netdev_phys_item_id *id1,
+ struct netdev_phys_item_id *id2)
+{
+   return id1->id_len == id2->id_len &&
+  !memcmp(id1->id, id2->id, id1->id_len);
+}
+
 typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
   struct sk_buff *skb);
 
-- 
1.9.3

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [patch net-next 12/13] rocker: introduce rocker switch driver

2014-09-03 Thread Jiri Pirko
This patch introduces the first driver to benefit from the switchdev
infrastructure and to implement newly introduced switch ndos. This is a
driver for emulated switch chip implemented in qemu:
https://github.com/sfeldma/qemu-rocker/

This patch is a result of joint work with Scott Feldman.

Signed-off-by: Scott Feldman 
Signed-off-by: Jiri Pirko 
---
 MAINTAINERS  |6 +
 drivers/net/ethernet/Kconfig |1 +
 drivers/net/ethernet/Makefile|1 +
 drivers/net/ethernet/rocker/Kconfig  |   29 +
 drivers/net/ethernet/rocker/Makefile |5 +
 drivers/net/ethernet/rocker/rocker.c | 3553 ++
 drivers/net/ethernet/rocker/rocker.h |  465 +
 7 files changed, 4060 insertions(+)
 create mode 100644 drivers/net/ethernet/rocker/Kconfig
 create mode 100644 drivers/net/ethernet/rocker/Makefile
 create mode 100644 drivers/net/ethernet/rocker/rocker.c
 create mode 100644 drivers/net/ethernet/rocker/rocker.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 4baaf44..9797bda 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7638,6 +7638,12 @@ F:   drivers/hid/hid-roccat*
 F: include/linux/hid-roccat*
 F: Documentation/ABI/*/sysfs-driver-hid-roccat*
 
+ROCKER DRIVER
+M: Jiri Pirko 
+L: net...@vger.kernel.org
+S: Supported
+F: drivers/net/ethernet/rocker/
+
 ROCKETPORT DRIVER
 P: Comtrol Corp.
 W: http://www.comtrol.com
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index dc7406c..61c9cc4 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -153,6 +153,7 @@ source "drivers/net/ethernet/qlogic/Kconfig"
 source "drivers/net/ethernet/realtek/Kconfig"
 source "drivers/net/ethernet/renesas/Kconfig"
 source "drivers/net/ethernet/rdc/Kconfig"
+source "drivers/net/ethernet/rocker/Kconfig"
 
 config S6GMAC
tristate "S6105 GMAC ethernet support"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index 224a018..51ff723 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_NET_VENDOR_QLOGIC) += qlogic/
 obj-$(CONFIG_NET_VENDOR_REALTEK) += realtek/
 obj-$(CONFIG_SH_ETH) += renesas/
 obj-$(CONFIG_NET_VENDOR_RDC) += rdc/
+obj-$(CONFIG_NET_VENDOR_ROCKER) += rocker/
 obj-$(CONFIG_S6GMAC) += s6gmac.o
 obj-$(CONFIG_NET_VENDOR_SAMSUNG) += samsung/
 obj-$(CONFIG_NET_VENDOR_SEEQ) += seeq/
diff --git a/drivers/net/ethernet/rocker/Kconfig 
b/drivers/net/ethernet/rocker/Kconfig
new file mode 100644
index 000..0441932
--- /dev/null
+++ b/drivers/net/ethernet/rocker/Kconfig
@@ -0,0 +1,29 @@
+#
+# Rocker device configuration
+#
+
+config NET_VENDOR_ROCKER
+   bool "Rocker devices"
+   default y
+   ---help---
+ If you have a network (Ethernet) card belonging to this class, say Y
+ and read the Ethernet-HOWTO, available from
+ .
+
+ Note that the answer to this question doesn't directly affect the
+ kernel: saying N will just cause the configurator to skip all
+ the questions about Rocker devices. If you say Y, you will be asked 
for
+ your specific card in the following questions.
+
+if NET_VENDOR_ROCKER
+
+config ROCKER
+   tristate "Rocker switch driver (EXPERIMENTAL)"
+   depends on PCI && NET_SWITCHDEV
+   ---help---
+ This driver supports Rocker switch device.
+
+ To compile this driver as a module, choose M here: the
+ module will be called rocker.
+
+endif # NET_VENDOR_ROCKER
diff --git a/drivers/net/ethernet/rocker/Makefile 
b/drivers/net/ethernet/rocker/Makefile
new file mode 100644
index 000..f85fb12
--- /dev/null
+++ b/drivers/net/ethernet/rocker/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Rocker network device drivers.
+#
+
+obj-$(CONFIG_ROCKER) += rocker.o
diff --git a/drivers/net/ethernet/rocker/rocker.c 
b/drivers/net/ethernet/rocker/rocker.c
new file mode 100644
index 000..0e8b1ef
--- /dev/null
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -0,0 +1,3553 @@
+/*
+ * drivers/net/ethernet/rocker/rocker.c - Rocker switch device driver
+ * Copyright (c) 2014 Jiri Pirko 
+ * Copyright (c) 2014 Scott Feldman 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "rocker.h"
+
+static const char rocker_driver_name[] = "rocker";
+
+static const struct pci_device_id rocker_pci_id_table[] = {
+   {PCI_VDEVICE(REDHAT, PCI_DEVICE_ID_REDHAT_ROCKER), 0},
+   {0, }
+};
+
+struct rocker_flow_t

[ovs-dev] [patch net-next 13/13] switchdev: introduce Netlink API

2014-09-03 Thread Jiri Pirko
This patch exposes switchdev API using generic Netlink.
Example userspace utility is here:
https://github.com/jpirko/switchdev

Signed-off-by: Jiri Pirko 
---
 MAINTAINERS   |   1 +
 include/uapi/linux/switchdev.h| 119 +
 net/switchdev/Kconfig |  11 +
 net/switchdev/Makefile|   1 +
 net/switchdev/switchdev_netlink.c | 493 ++
 5 files changed, 625 insertions(+)
 create mode 100644 include/uapi/linux/switchdev.h
 create mode 100644 net/switchdev/switchdev_netlink.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 9797bda..83c4f43 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8820,6 +8820,7 @@ L:net...@vger.kernel.org
 S: Supported
 F: net/switchdev/
 F: include/net/switchdev.h
+F: include/uapi/linux/switchdev.h
 
 SYNOPSYS ARC ARCHITECTURE
 M: Vineet Gupta 
diff --git a/include/uapi/linux/switchdev.h b/include/uapi/linux/switchdev.h
new file mode 100644
index 000..83692e2
--- /dev/null
+++ b/include/uapi/linux/switchdev.h
@@ -0,0 +1,119 @@
+/*
+ * include/uapi/linux/switchdev.h - Netlink interface to Switch device
+ * Copyright (c) 2014 Jiri Pirko 
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _UAPI_LINUX_SWITCHDEV_H_
+#define _UAPI_LINUX_SWITCHDEV_H_
+
+enum {
+   SWDEV_CMD_NOOP,
+   SWDEV_CMD_FLOW_INSERT,
+   SWDEV_CMD_FLOW_REMOVE,
+};
+
+enum {
+   SWDEV_ATTR_UNSPEC,
+   SWDEV_ATTR_IFINDEX, /* u32 */
+   SWDEV_ATTR_FLOW,/* nest */
+
+   __SWDEV_ATTR_MAX,
+   SWDEV_ATTR_MAX = (__SWDEV_ATTR_MAX - 1),
+};
+
+enum {
+   SWDEV_ATTR_FLOW_KEY_UNSPEC,
+   SWDEV_ATTR_FLOW_KEY_TUN_ID, /* be64 */
+   SWDEV_ATTR_FLOW_KEY_TUN_IPV4_SRC,   /* be32 */
+   SWDEV_ATTR_FLOW_KEY_TUN_IPV4_DST,   /* be32 */
+   SWDEV_ATTR_FLOW_KEY_TUN_FLAGS,  /* be16 */
+   SWDEV_ATTR_FLOW_KEY_TUN_IPV4_TOS,   /* u8 */
+   SWDEV_ATTR_FLOW_KEY_TUN_IPV4_TTL,   /* u8 */
+   SWDEV_ATTR_FLOW_KEY_PHY_PRIORITY,   /* u32 */
+   SWDEV_ATTR_FLOW_KEY_PHY_IN_PORT,/* u32 (ifindex) */
+   SWDEV_ATTR_FLOW_KEY_ETH_SRC,/* ETH_ALEN */
+   SWDEV_ATTR_FLOW_KEY_ETH_DST,/* ETH_ALEN */
+   SWDEV_ATTR_FLOW_KEY_ETH_TCI,/* be16 */
+   SWDEV_ATTR_FLOW_KEY_ETH_TYPE,   /* be16 */
+   SWDEV_ATTR_FLOW_KEY_IP_PROTO,   /* u8 */
+   SWDEV_ATTR_FLOW_KEY_IP_TOS, /* u8 */
+   SWDEV_ATTR_FLOW_KEY_IP_TTL, /* u8 */
+   SWDEV_ATTR_FLOW_KEY_IP_FRAG,/* u8 */
+   SWDEV_ATTR_FLOW_KEY_TP_SRC, /* be16 */
+   SWDEV_ATTR_FLOW_KEY_TP_DST, /* be16 */
+   SWDEV_ATTR_FLOW_KEY_TP_FLAGS,   /* be16 */
+   SWDEV_ATTR_FLOW_KEY_IPV4_ADDR_SRC,  /* be32 */
+   SWDEV_ATTR_FLOW_KEY_IPV4_ADDR_DST,  /* be32 */
+   SWDEV_ATTR_FLOW_KEY_IPV4_ARP_SHA,   /* ETH_ALEN */
+   SWDEV_ATTR_FLOW_KEY_IPV4_ARP_THA,   /* ETH_ALEN */
+   SWDEV_ATTR_FLOW_KEY_IPV6_ADDR_SRC,  /* struct in6_addr */
+   SWDEV_ATTR_FLOW_KEY_IPV6_ADDR_DST,  /* struct in6_addr */
+   SWDEV_ATTR_FLOW_KEY_IPV6_LABEL, /* be32 */
+   SWDEV_ATTR_FLOW_KEY_IPV6_ND_TARGET, /* struct in6_addr */
+   SWDEV_ATTR_FLOW_KEY_IPV6_ND_SLL,/* ETH_ALEN */
+   SWDEV_ATTR_FLOW_KEY_IPV6_ND_TLL,/* ETH_ALEN */
+
+   __SWDEV_ATTR_FLOW_KEY_MAX,
+   SWDEV_ATTR_FLOW_KEY_MAX = (__SWDEV_ATTR_FLOW_KEY_MAX - 1),
+};
+
+enum {
+   SWDEV_FLOW_ACTION_TYPE_OUTPUT,
+   SWDEV_FLOW_ACTION_TYPE_VLAN_PUSH,
+   SWDEV_FLOW_ACTION_TYPE_VLAN_POP,
+};
+
+enum {
+   SWDEV_ATTR_FLOW_ACTION_UNSPEC,
+   SWDEV_ATTR_FLOW_ACTION_TYPE,/* u32 */
+   SWDEV_ATTR_FLOW_ACTION_OUT_PORT,/* u32 (ifindex) */
+   SWDEV_ATTR_FLOW_ACTION_VLAN_PROTO,  /* be16 */
+   SWDEV_ATTR_FLOW_ACTION_VLAN_TCI,/* u16 */
+
+   __SWDEV_ATTR_FLOW_ACTION_MAX,
+   SWDEV_ATTR_FLOW_ACTION_MAX = (__SWDEV_ATTR_FLOW_ACTION_MAX - 1),
+};
+
+enum {
+   SWDEV_ATTR_FLOW_ITEM_UNSPEC,
+   SWDEV_ATTR_FLOW_ITEM_ACTION,/* nest */
+
+   __SWDEV_ATTR_FLOW_ITEM_MAX,
+   SWDEV_ATTR_FLOW_ITEM_MAX = (__SWDEV_ATTR_FLOW_ITEM_MAX - 1),
+};
+
+enum {
+   SWDEV_ATTR_FLOW_UNSPEC,
+   SWDEV_ATTR_FLOW_KEY,/* nest */
+   SWDEV_ATTR_FLOW_MASK,   /* nest */
+   SWDEV_ATTR_FLOW_LIST_ACTION,/* nest */
+
+   __SWDEV_ATTR_FLOW_MAX,
+   SWDEV_ATTR_FLOW_MAX = (__SWDEV_ATTR_FLOW_MAX - 1),
+};
+
+/* Nested layout of flow add/remove command message:
+ *
+ * [SWDEV_ATTR_IFINDEX]
+ * [SWDEV_ATTR_FLOW]
+ * [SWDEV_ATTR_FLOW_K

Re: [ovs-dev] [PATCH 4/5] OvsTypes.h : Added support for BE16

2014-09-03 Thread Eitan Eliahu
Acked-by: Eitan Eliahu 


-Original Message-
From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ankur Sharma
Sent: Tuesday, September 02, 2014 6:05 PM
To: dev@openvswitch.org
Subject: [ovs-dev] [PATCH 4/5] OvsTypes.h : Added support for BE16

---
 datapath-windows/ovsext/Types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datapath-windows/ovsext/Types.h b/datapath-windows/ovsext/Types.h 
index e48df7a..b2ef48c 100644
--- a/datapath-windows/ovsext/Types.h
+++ b/datapath-windows/ovsext/Types.h
@@ -31,6 +31,7 @@ typedef uint8 __u8;
 
 /* Defines the  userspace specific data types for file
  * included within kernel only. */
+typedef UINT16 BE16;
 typedef UINT32 BE32;
 typedef UINT64 BE64;
 
--
1.9.1

___
dev mailing list
dev@openvswitch.org
https://urldefense.proofpoint.com/v1/url?u=http://openvswitch.org/mailman/listinfo/dev&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=yTvML8OxA42Jb6ViHe7fUXbvPVOYDPVq87w43doxtlY%3D%0A&m=qxCPHjsEm0sk7Ed6%2BvnOX6veLUjzzP7sgt5yP7GcfS8%3D%0A&s=4c04a83a62f10d0b7821a1fe5c301f2bb90e69d8943baf6f6ad8929819a45e03
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/5] NetlinkBuf.c: Netlink buffer mgmt apis.

2014-09-03 Thread Eitan Eliahu
Acked-by: Eitan Eliahu 


-Original Message-
From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ankur Sharma
Sent: Tuesday, September 02, 2014 6:05 PM
To: dev@openvswitch.org
Subject: [ovs-dev] [PATCH 2/5] NetlinkBuf.c: Netlink buffer mgmt apis.

In this change we have introduced buffer mgmt apis which will be used while 
creating netlink messages. The basic functionality provided by apis is on 
similar lines to ofpbuf in userspace with an exception that it will not do run 
time buffer reallocation.

Signed-off-by: Ankur Sharma 
Tested-by: Ankur Sharma 
Reported-at: 
https://urldefense.proofpoint.com/v1/url?u=https://github.com/openvswitch/ovs-issues/issues/37&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=yTvML8OxA42Jb6ViHe7fUXbvPVOYDPVq87w43doxtlY%3D%0A&m=OuQRXzkYbsP6LYVFbyIj3j2mCyokbU3E%2ByrsD%2BWE6vI%3D%0A&s=3e11857707219d6ca00afa1f475ad4ddea27c2e80b192aeb345a0dfd061eb8bf
---
 datapath-windows/automake.mk |   2 +
 datapath-windows/ovsext/Netlink/NetlinkBuf.c | 349 +++ 
 datapath-windows/ovsext/Netlink/NetlinkBuf.h |  41 
 datapath-windows/ovsext/ovsext.vcxproj   |   2 +
 4 files changed, 394 insertions(+)
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.c
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 
5ea0197..297a809 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -32,6 +32,8 @@ EXTRA_DIST += \
datapath-windows/ovsext/Jhash.h \
datapath-windows/ovsext/Netlink/Netlink.c \
datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkBuf.c \
+   datapath-windows/ovsext/Netlink/NetlinkBuf.h \
datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
diff --git a/datapath-windows/ovsext/Netlink/NetlinkBuf.c 
b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
new file mode 100644
index 000..97436ea
--- /dev/null
+++ b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * 
https://urldefense.proofpoint.com/v1/url?u=http://www.apache.org/licenses/LICENSE-2.0&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=yTvML8OxA42Jb6ViHe7fUXbvPVOYDPVq87w43doxtlY%3D%0A&m=OuQRXzkYbsP6LYVFbyIj3j2mCyokbU3E%2ByrsD%2BWE6vI%3D%0A&s=fb0ca26755367465f21b4c127b085d7c6cd716e9a768d479437d6a52c892bdb0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+===
+===
+ * This is a simple buffer mangement framework specific for netlink protocol.
+ * The name could be confused with ovsext/BufferMgmt.c. 
+Ovsext/BufferMgmt.c
+ * deals with buffer mgmt for NBLs. Where as this framework deals with
+ * management of buffer that holds a netlink message.
+ *
+ * This framework provides APIs for putting/accessing data in a buffer. 
+These
+ * APIs are used by driver's netlink protocol implementation.
+ *
+ * We can see this framework as a subset of ofpbuf in ovs userspace.
+ *
+ * This framework is NOT a generic buffer management framework (ofpbuf
+ * is a generic buffer mgmt framework) and provides only the 
+functioanlities
+ * which would be useful for netlink protocol. Some of the key features are:
+ *
+ * a. It DOES NOT support automatic buffer reallocation.
+ *i. A netlink input/output message is a static buffer.
+ * b. The unused space is at the tail.
+ * c. There is no notion of headdroom.
+ * 
+===
+===
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_NETLINK
+#include "Debug.h"
+#include "NetlinkBuf.h"
+
+/* Returns used space in the buffer */
+#define NL_BUF_USED_SPACE(_buf)   (_buf->bufLen -\
+   _buf->bufRemLen)
+
+/* Validates that offset is within buffer boundaries and will not
+ * create holes in the buffer.*/
+#define NL_BUF_IS_VALID_OFFSET(_buf, _offset) (_offset <=\
+   
+NL_BUF_TAIL_OFFSET(_buf) ? 1 : 0)
+
+/* Validates if new data of size _size can be added at offset _offset.
+ * This macor assumes that offset validation has been done.*/
+#define NL_BUF_CAN_ADD(_buf, _size, _offset)  (((_offset

Re: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to a new directory.

2014-09-03 Thread Eitan Eliahu
Acked-by: Eitan Eliahu 


-Original Message-
From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ankur Sharma
Sent: Tuesday, September 02, 2014 6:04 PM
To: dev@openvswitch.org
Subject: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to 
a new directory.

In this change we have created a new directory named Netlink
inside datapath-windows/ovsext/. This directory will be used to
keep all the netlink related files.

The reason we have created new directory is that for 'put' related
APIs we will be adding netlink buffer mgmt files as well. These files
will take the count of netlink related files to 5. Hence
we decided to club the netlink files in a single directory.
---
 datapath-windows/automake.mk   |   6 +-
 datapath-windows/include/OvsPub.h  |   2 +-
 datapath-windows/ovsext/Netlink.c  | 469 -
 datapath-windows/ovsext/Netlink.h  | 104 --
 datapath-windows/ovsext/Netlink/Netlink.c  | 469 +
 datapath-windows/ovsext/Netlink/Netlink.h  | 104 ++
 datapath-windows/ovsext/Netlink/NetlinkProto.h | 116 ++
 datapath-windows/ovsext/NetlinkProto.h | 116 --
 datapath-windows/ovsext/ovsext.vcxproj |   8 +-
 datapath-windows/ovsext/precomp.h  |   4 +-
 10 files changed, 699 insertions(+), 699 deletions(-)
 delete mode 100644 datapath-windows/ovsext/Netlink.c
 delete mode 100644 datapath-windows/ovsext/Netlink.h
 create mode 100644 datapath-windows/ovsext/Netlink/Netlink.c
 create mode 100644 datapath-windows/ovsext/Netlink/Netlink.h
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkProto.h
 delete mode 100644 datapath-windows/ovsext/NetlinkProto.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index eb59274..5ea0197 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -5,9 +5,6 @@ EXTRA_DIST += \
datapath-windows/Package/package.VcxProj.user \
datapath-windows/include/OvsDpInterfaceExt.h \
datapath-windows/include/OvsPub.h\
-   datapath-windows/ovsext/Netlink.c \
-   datapath-windows/ovsext/Netlink.h \
-   datapath-windows/ovsext/NetlinkProto.h \
datapath-windows/misc/install.cmd \
datapath-windows/misc/uninstall.cmd \
datapath-windows/ovsext.sln \
@@ -33,6 +30,9 @@ EXTRA_DIST += \
datapath-windows/ovsext/IpHelper.h \
datapath-windows/ovsext/Jhash.c \
datapath-windows/ovsext/Jhash.h \
+   datapath-windows/ovsext/Netlink/Netlink.c \
+   datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
datapath-windows/ovsext/Oid.h \
diff --git a/datapath-windows/include/OvsPub.h 
b/datapath-windows/include/OvsPub.h
index 0446309..36814c4 100644
--- a/datapath-windows/include/OvsPub.h
+++ b/datapath-windows/include/OvsPub.h
@@ -17,7 +17,7 @@
 #ifndef __OVS_PUB_H_
 #define __OVS_PUB_H_ 1
 
-#include "../ovsext/Netlink.h"
+#include "../ovsext/Netlink/Netlink.h"
 
 #define OVS_DRIVER_MAJOR_VER 1
 #define OVS_DRIVER_MINOR_VER 0
diff --git a/datapath-windows/ovsext/Netlink.c 
b/datapath-windows/ovsext/Netlink.c
deleted file mode 100644
index 90a633b..000
--- a/datapath-windows/ovsext/Netlink.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * 
https://urldefense.proofpoint.com/v1/url?u=http://www.apache.org/licenses/LICENSE-2.0&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=yTvML8OxA42Jb6ViHe7fUXbvPVOYDPVq87w43doxtlY%3D%0A&m=RbdeFJ06%2BD%2FS%2FpsG%2FCSqalFPs6YgWLudn3%2BZhiC74gs%3D%0A&s=3e504b928c3baf58eced66bcb98cdf424a7f040646e697a423022758821745e4
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "precomp.h"
-#include "NetlinkProto.h"
-#include "Netlink.h"
-
-#ifdef OVS_DBG_MOD
-#undef OVS_DBG_MOD
-#endif
-#define OVS_DBG_MOD OVS_DBG_NETLINK
-#include "Debug.h"
-
-/*
- * ---
- * Netlink message accessing the payload.
- * ---
- */
-PVOID
-NlMsgAt(const PNL_MSG_HDR nlh, UINT32 offset)
-{
-return ((PCHAR)nlh + offset);
-}
-
-/*
- * ---
- * Returns the size of netlink message.
- * ---

Re: [ovs-dev] [PATCH 3/5] NetlinkProto.h: Minor fix for typos and new macro for padding.

2014-09-03 Thread Eitan Eliahu
Acked-by: Eitan Eliahu 


-Original Message-
From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ankur Sharma
Sent: Tuesday, September 02, 2014 6:05 PM
To: dev@openvswitch.org
Subject: [ovs-dev] [PATCH 3/5] NetlinkProto.h: Minor fix for typos and new 
macro for padding.

Added a new macro for calculating the number of bytes required for padding. 
Fixed a minor typo.
---
 datapath-windows/ovsext/Netlink/NetlinkProto.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/datapath-windows/ovsext/Netlink/NetlinkProto.h 
b/datapath-windows/ovsext/Netlink/NetlinkProto.h
index 898cc84..2c438a6 100644
--- a/datapath-windows/ovsext/Netlink/NetlinkProto.h
+++ b/datapath-windows/ovsext/Netlink/NetlinkProto.h
@@ -39,6 +39,9 @@
 /* Returns X rounded up to the nearest multiple of Y. */  #define ROUND_UP(X, 
Y) (DIV_ROUND_UP(X, Y) * (Y))
 
+/* Returns the least number that, when added to X, yields a multiple of 
+Y. */ #define PAD_SIZE(X, Y) (ROUND_UP(X, Y) - (X))
+
 /* Netlink message */
 
 /* nlmsg_flags bits. */
@@ -92,7 +95,7 @@ typedef struct _GENL_MSG_HDR {
 UINT8 cmd;
 UINT8 version;
 UINT16 reserved;
-} GENL_MSG_HDR, *PGENL_MDG_HDR;
+} GENL_MSG_HDR, *PGENL_MSG_HDR;
 BUILD_ASSERT_DECL(sizeof(GENL_MSG_HDR) == 4);
 
 /* Netlink attributes */
--
1.9.1

___
dev mailing list
dev@openvswitch.org
https://urldefense.proofpoint.com/v1/url?u=http://openvswitch.org/mailman/listinfo/dev&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=yTvML8OxA42Jb6ViHe7fUXbvPVOYDPVq87w43doxtlY%3D%0A&m=5Hiz9tfrket71f8QhIXlDRdzAU27UGUs39HEbv5hMck%3D%0A&s=1a8c60ce7cea544db179d5e751603c8d8406b5ccaea2dfeaaab925abe8152adc
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 5/5] Netlink.c: Added netlink put APIs.

2014-09-03 Thread Eitan Eliahu
In a later change we would like to have  these functions inline.

Acked-by: Eitan Eliahu 


-Original Message-
From: dev [mailto:dev-boun...@openvswitch.org] On Behalf Of Ankur Sharma
Sent: Tuesday, September 02, 2014 6:05 PM
To: dev@openvswitch.org
Subject: [ovs-dev] [PATCH 5/5] Netlink.c: Added netlink put APIs.

In this change we have added the APIs for putting netlink headers, attributes 
in a buffer.

The buffer is managed through NetlinkBuf.[c|h].
---
 datapath-windows/ovsext/Netlink/Netlink.c | 403 +- 
 datapath-windows/ovsext/Netlink/Netlink.h |  28 +++
 2 files changed, 427 insertions(+), 4 deletions(-)

diff --git a/datapath-windows/ovsext/Netlink/Netlink.c 
b/datapath-windows/ovsext/Netlink/Netlink.c
index 90a633b..8c6b139 100644
--- a/datapath-windows/ovsext/Netlink/Netlink.c
+++ b/datapath-windows/ovsext/Netlink/Netlink.c
@@ -24,6 +24,402 @@
 #define OVS_DBG_MOD OVS_DBG_NETLINK
 #include "Debug.h"
 
+/* 
+===
+===
+ * This file provides simple netlink get, put and validation APIs.
+ * Most of the code is on similar lines as userspace netlink implementation.
+ *
+ * TODO: Convert these methods to inline.
+ * 
+===
+===
+ */
+
+/*
+ * 
+---
+
+ * Adds Netlink Header to the NL_BUF.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutNlHdr(PNL_BUF buf, PNL_MSG_HDR nlMsg) {
+if ((NlBufCopyAtOffset(buf, (PCHAR)nlMsg, NLMSG_HDRLEN, 0))) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * 
+---
+
+ * Adds Genl Header to the NL_BUF.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutGenlHdr(PNL_BUF buf, PGENL_MSG_HDR genlMsg) {
+if ((NlBufCopyAtOffset(buf, (PCHAR)genlMsg, GENL_HDRLEN, NLMSG_HDRLEN))) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * 
+---
+
+ * Adds OVS Header to the NL_BUF.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutOvsHdr(PNL_BUF buf, POVS_HDR ovsHdr) {
+if ((NlBufCopyAtOffset(buf, (PCHAR)ovsHdr, OVS_HDRLEN,
+   GENL_HDRLEN + NLMSG_HDRLEN))) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * 
+---
+
+ * Adds data of length 'len' to the tail end of NL_BUF.
+ * Refer nl_msg_put for more details.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutTail(PNL_BUF buf, const PCHAR data, UINT32 len) {
+len = NLMSG_ALIGN(len);
+if (NlBufCopyAtTail(buf, data, len)) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * 
+---
+
+ * memsets length 'len' at tail end of NL_BUF.
+ * Refer nl_msg_put_uninit for more details.
+ * 
+---
+
+ */
+PCHAR
+NlMsgPutTailUninit(PNL_BUF buf, UINT32 len) {
+len = NLMSG_ALIGN(len);
+return NlBufCopyAtTailUninit(buf, len); }
+
+/*
+ * 
+---
+
+ * Adds an attribute to the tail end of buffer. It does
+ * not copy the attribute payload.
+ * Refer nl_msg_put_unspec_uninit for more details.
+ * 
+---
+
+ */
+PCHAR
+NlMsgPutTailUnspecUninit(PNL_BUF buf, UINT16 type, UINT16 len) {
+PCHAR ret = NULL;
+UINT16 totalLen = NLA_HDRLEN + len;
+PNL_ATTR nla = (PNL_ATTR)(NlMsgPutTailUninit(buf, totalLen));
+
+if (!nla) {
+goto done;
+}
+
+ret = (PCHAR)(nla + 1);
+nla->nlaLen = totalLen;
+nla->nlaType = type;
+
+done:
+return ret;
+}
+
+/*
+ * 
+---
+
+ * Adds an attribute to the tail end of buffer. It copies attribute
+ * payload as well.
+ * Refer nl_msg_put_unspec for more details.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutTailUnspec(PNL_BUF buf, UINT16 type, PCHAR data, UINT16 len) {
+BOOLEAN ret = TRUE;
+PCHAR nlaData = NlMsgPutTailUnspecUninit(buf, type, len);
+
+if (!nlaData) {
+ret = FALSE;
+goto done;
+}
+
+RtlCopyMemory(nlaData, data, len);
+
+done:
+return ret;
+}
+
+/*
+ * 
+---
+
+ * Adds an attribute of 'type' and no payload at the tail end of buffer.
+ * Refer nl_msg_put_flag for more details.
+ * 
+-

Re: [ovs-dev] [patch net-next 01/13] openvswitch: split flow structures into ovs specific and generic ones

2014-09-03 Thread John Fastabend

On 09/03/2014 02:24 AM, Jiri Pirko wrote:

After this, flow related structures can be used in other code.

Signed-off-by: Jiri Pirko 
---


Hi Jiri,

As I indicated before I'm looking into integrating this with some
hardware here. Progress is a bit slow but starting to look at it.The
i40e/ixgbe driver being one open source example with very limited
support for tables, flow matches, etc. And then a closed source driver
with much more flexibility. What I don't have is a middle of the road
switch to work with something better then a host nic but not as
flexible as a TOR.

Couple questions my assumption here is I can extend the flow_key
as needed to support additional match criteria my hardware has.
I scanned the ./net/openvswitch source and I didn't catch any
place that would break but might need to take a closer look.
Similarly the actions set will need to be extended. For example
if I want to use this with i40e a OVS_ACTION_ATTR_QUEUE could
be used to steer packets to the queue. With this in mind we
will want a follow up patch to rename OVS_ACTION_ATTR_* to
FLOW_ACTION_ATTR_*

Also I have some filters that can match on offset/length/mask
tuples. As far as I can tell this is going to have to be yet
another interface? Or would it be worth the effort to define
the flow key more generically. My initial guess is I'll just
write a separate interface. I think this is what Jamal referred
to as another "classifier".

Thanks,
John

[...]


+
+struct sw_flow_key_ipv4_tunnel {
+   __be64 tun_id;
+   __be32 ipv4_src;
+   __be32 ipv4_dst;
+   __be16 tun_flags;
+   u8   ipv4_tos;
+   u8   ipv4_ttl;
+};
+
+struct sw_flow_key {
+   struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. */
+   struct {
+   u32 priority;   /* Packet QoS priority. */
+   u32 skb_mark;   /* SKB mark. */
+   u16 in_port;/* Input switch port (or DP_MAX_PORTS). 
*/
+   } __packed phy; /* Safe when right after 'tun_key'. */
+   struct {
+   u8 src[ETH_ALEN];   /* Ethernet source address. */
+   u8 dst[ETH_ALEN];   /* Ethernet destination address. */
+   __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set 
otherwise. */
+   __be16 type;/* Ethernet frame type. */
+   } eth;
+   struct {
+   u8 proto;   /* IP protocol or lower 8 bits of ARP 
opcode. */
+   u8 tos; /* IP ToS. */
+   u8 ttl; /* IP TTL/hop limit. */
+   u8 frag;/* One of OVS_FRAG_TYPE_*. */
+   } ip;
+   struct {
+   __be16 src; /* TCP/UDP/SCTP source port. */
+   __be16 dst; /* TCP/UDP/SCTP destination port. */
+   __be16 flags;   /* TCP flags. */
+   } tp;
+   union {
+   struct {
+   struct {
+   __be32 src; /* IP source address. */
+   __be32 dst; /* IP destination address. */
+   } addr;
+   struct {
+   u8 sha[ETH_ALEN];   /* ARP source hardware 
address. */
+   u8 tha[ETH_ALEN];   /* ARP target hardware 
address. */
+   } arp;
+   } ipv4;
+   struct {
+   struct {
+   struct in6_addr src;/* IPv6 source address. 
*/
+   struct in6_addr dst;/* IPv6 destination 
address. */
+   } addr;
+   __be32 label;   /* IPv6 flow label. */
+   struct {
+   struct in6_addr target; /* ND target address. */
+   u8 sll[ETH_ALEN];   /* ND source link layer 
address. */
+   u8 tll[ETH_ALEN];   /* ND target link layer 
address. */
+   } nd;
+   } ipv6;
+   };
+} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. */
+
+struct sw_flow_key_range {
+   unsigned short int start;
+   unsigned short int end;
+};
+
+struct sw_flow_mask {
+   struct sw_flow_key_range range;
+   struct sw_flow_key key;
+};
+
+struct sw_flow_action {
+};
+
+struct sw_flow_actions {
+   unsigned count;
+   struct sw_flow_action actions[0];
+};
+
+struct sw_flow {
+   struct sw_flow_key key;
+   struct sw_flow_key unmasked_key;
+   struct sw_flow_mask *mask;
+   struct sw_flow_actions *actions;
+};
+



--
John Fastabend Intel Corporation
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] vtep: additions to BFD configuration and status reporting

2014-09-03 Thread Gurucharan Shetty
On Tue, Sep 2, 2014 at 1:58 PM,   wrote:
> From: Ariel Tubaltsev 
>
> This commit adds default values for some BFD configuration keys
> (bfd_config_local:bfd_dst_mac and bfd_params:enable). It also adds new
> BFD status keys (bfd_enabled and bfd_info).
>
> Signed-off-by: Ariel Tubaltsev 
> Signed-off-by: Bruce Davie 
Applied, thanks.

> ---
>  vtep/vtep.xml |   16 
>  1 file changed, 16 insertions(+)
>
> diff --git a/vtep/vtep.xml b/vtep/vtep.xml
> index 4393a16..cf6bf74 100644
> --- a/vtep/vtep.xml
> +++ b/vtep/vtep.xml
> @@ -360,6 +360,7 @@
>Set to an Ethernet address in the form
>
> xx:xx:xx:xx:xx:xx
>to set the MAC expected as destination for received BFD packets.
> +  The default is 00:23:20:00:00:01.
>  
>
>  
> @@ -398,6 +399,7 @@
>
>  
>True to enable BFD on this tunnel.
> +  The default is False.
>  
>
>   @@ -459,6 +461,15 @@
>  HSC clears all key-value pairs from .
>
>
> +   +type='{"type": "boolean"}'>
> +Set to true if the BFD session has been successfully
> +enabled. Set to false if the VTEP cannot support BFD or has
> +insufficient resources to enable BFD on this tunnel. The NVC
> +will disable the BFD monitoring on the other side of the tunnel
> +once this value is set to false.
> +  
> +
>  type='{"type": "string",
>"enum": ["set", ["admin_down", "down", "init", "up"]]}'>
> @@ -490,6 +501,11 @@
>  remote endpoint's BFD session thinks is wrong.  The error messages
>  are defined in section 4.1 of [RFC 5880].
>
> +
> +  
> +A short message providing further information about the BFD status
> +(possibly including reasons why BFD could not be enabled).
> +  
>
>  
>
> --
> 1.7.9.5
>
> ___
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v6 00/18] Rapid Spanning Tree Protocol

2014-09-03 Thread Daniele Venturino
I looked and applied the patches. They’re good to me, I just have some
notes on patch 13/18 and 16/18.


@@ -108,9 +121,9 @@  process_received_bpdu(struct rstp_port *p, const void
> *bpdu, size_t bpdu_size)
>  memcpy(&p->received_bpdu_buffer, bpdu, sizeof(struct rstp_bpdu));
>  rstp->changes = true;
> -move_rstp(rstp);
> +move_rstp__(rstp);
>  } else {
> -VLOG_DBG("%s, port %u: Bad BPDU received", p->rstp->name,
> +VLOG_DBG("%s, port %u: Bad RSTP BPDU received", p->rstp->name,
>   p->port_number);
>  p->error_count++;
>  }


The received BPDU could also be a STP BPDU.

/* Each RSTP port poits back to struct rstp without holding a
> + * reference for that pointer.  This is OK as we never move
> + * ports from one bridge to another, and holders always
> + * release their ports before releasing the bridge.  This
> + * means that there should be not ports at this time. */
> +ovs_assert(rstp->ports_count == 0);


Each RSTP port points back

 +rstp_set_bridge_priority__(rstp, RSTP_DEFAULT_PRIORITY);
> +rstp_set_bridge_ageing_time__(rstp, RSTP_DEFAULT_AGEING_TIME);
> +rstp_set_bridge_forward_delay__(rstp,
> RSTP_DEFAULT_BRIDGE_FORWARD_DELAY);
> +rstp_set_bridge_hello_time__(rstp);
> +rstp_set_bridge_max_age__(rstp, RSTP_DEFAULT_BRIDGE_MAX_AGE);
> +rstp_set_bridge_migrate_time__(rstp);
> +rstp_set_bridge_transmit_hold_count__(rstp,
> +
>  RSTP_DEFAULT_TRANSMIT_HOLD_COUNT);
> +rstp_set_bridge_times__(rstp, RSTP_DEFAULT_BRIDGE_FORWARD_DELAY,
> +RSTP_BRIDGE_HELLO_TIME,
> +RSTP_DEFAULT_BRIDGE_MAX_AGE, 0);
>
>
These setters are the same in rstp_create() and reinitialize_rstp__(). We
could define a funcion like rstp_initialize_port_defaults__() for the
bridge.


> +static void
> +rstp_port_set_mcheck__(struct rstp_port *port, bool mcheck)
> +OVS_REQUIRES(rstp_mutex)
>  {
> -struct rstp *rstp;
> +/* XXX: Should we also support setting this to false, i.e., when port
> + * configuration is changed? */
> +if (mcheck == true && port->rstp->force_protocol_version >= 2) {
> +port->mcheck = true;


802.1D-2004 standard claims mcheck to be set from management and cleared
from its procedure.

*17.19.13 mcheck*
*A boolean. May be set by management to force the Port Protocol Migration
state machine to transmit RST*
*BPDUs for a MigrateTime (17.13.9) period, to test whether all STP Bridges
(17.4) on the attached LAN*
*have been removed and the Port can continue to transmit RSTP BPDUs.
Setting mcheck has no effect if*
*stpVersion (17.20.12) is TRUE, i.e., the Bridge is operating in “STP
Compatibility” mode.*

However to use it twice, I need to reset it in the database (to make it
change when i want to invoke its setter), so i use the command with 0, with
the only purpouse to clear it in the db, no action is needed from rstp.
Then i can set it again and trigger the procedure.


static void
>  xlate_xport_set(struct xport *xport, odp_port_t odp_port,
>  const struct netdev *netdev, const struct cfm *cfm,
> -const struct bfd *bfd, int stp_port_no, int rstp_port_no,
> +const struct bfd *bfd, int stp_port_no,
> +const struct rstp_port* rstp_port,
>  enum ofputil_port_config config, enum ofputil_port_state
> state,
>  bool is_tunnel, bool may_enable)
>  {
>  xport->config = config;
>  xport->state = state;
>  xport->stp_port_no = stp_port_no;
> -xport->rstp_port_no = rstp_port_no;


I get a segfault when removing a port from a bridge. I don't if I add here
this line:
xport->rstp_port = rstp_port;


 xport->is_tunnel = is_tunnel;
>  xport->may_enable = may_enable;
>  xport->odp_port = odp_port;
> +if (xport->rstp_port != rstp_port) {
> +rstp_port_unref(xport->rstp_port);
> +xport->rstp_port = rstp_port_ref(rstp_port);
> +}
> @@ -3133,16 +3088,15 @@  port_run(struct ofport_dpif *ofport)
>  if (ofport->may_enable != enable) {
>  struct ofproto_dpif *ofproto =
> ofproto_dpif_cast(ofport->up.ofproto);
> -ofproto->backer->need_revalidate = REV_PORT_TOGGLED;
> -}
> -ofport->may_enable = enable;
> +ofproto->backer->need_revalidate = REV_PORT_TOGGLED;
> -if (ofport->rstp_port) {
> -if (rstp_port_get_mac_operational(ofport->rstp_port) != enable) {
> +if (ofport->rstp_port) {
>  rstp_port_set_mac_operational(ofport->rstp_port, enable);
>  }
>  }
> +
> +ofport->may_enable = enable;
>  }


rstp_port_set_mac_operational(ofport->rstp_port, enable) should be outside
 if (ofport->may_enable != enable) otherwise ports remain disabled when
added.


In patch 16/18:

diff --git a/lib/rstp-state-machines.c b/lib/rstp-state-machines.c
> index e8b8438..5ae7124 100644
> --- a/lib/rstp-state-machin

Re: [ovs-dev] [patch net-next 03/13] net: introduce generic switch devices support

2014-09-03 Thread John Fastabend

On 09/03/2014 02:24 AM, Jiri Pirko wrote:

The goal of this is to provide a possibility to suport various switch
chips. Drivers should implement relevant ndos to do so. Now there is a
couple of ndos defines:
- for getting physical switch id is in place.
- for work with flows.

Note that user can use random port netdevice to access the switch.

Signed-off-by: Jiri Pirko 
---



[...]


  struct netpoll_info;
@@ -997,6 +999,24 @@ typedef u16 (*select_queue_fallback_t)(struct net_device 
*dev,
   *Callback to use for xmit over the accelerated station. This
   *is used in place of ndo_start_xmit on accelerated net
   *devices.
+ *
+ * int (*ndo_swdev_get_id)(struct net_device *dev,
+ *struct netdev_phys_item_id *psid);
+ * Called to get an ID of the switch chip this port is part of.
+ * If driver implements this, it indicates that it represents a port
+ * of a switch chip.
+ *
+ * int (*ndo_swdev_flow_insert)(struct net_device *dev,
+ * const struct sw_flow *flow);
+ * Called to insert a flow into switch device. If driver does
+ * not implement this, it is assumed that the hw does not have
+ * a capability to work with flows.
+ *
+ * int (*ndo_swdev_flow_remove)(struct net_device *dev,
+ * const struct sw_flow *flow);
+ * Called to remove a flow from switch device. If driver does
+ * not implement this, it is assumed that the hw does not have
+ * a capability to work with flows.
   */
  struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1146,6 +1166,14 @@ struct net_device_ops {
struct net_device *dev,
void *priv);
int (*ndo_get_lock_subclass)(struct net_device 
*dev);
+#ifdef CONFIG_NET_SWITCHDEV
+   int (*ndo_swdev_get_id)(struct net_device *dev,
+   struct netdev_phys_item_id 
*psid);
+   int (*ndo_swdev_flow_insert)(struct net_device *dev,
+const struct sw_flow 
*flow);
+   int (*ndo_swdev_flow_remove)(struct net_device *dev,
+const struct sw_flow 
*flow);


Not really a critique of your patch but I'll need to extend this
with a ndo_swdev_flow_dump() to get the fields. Without this if
your user space side ever restarts, gets out of sync there is no
way to get back in sync.

Also with hardware that has multiple flow tables we need to indicate
the table to insert the flow into. One concrete reason to do this
is to create atomic updates of multiple ACLs. The idea is to create
a new ACL table build the table up and then link it in. This can be
added when its needed my opensource drivers don't support this yet
either but maybe adding multiple tables to rocker switch will help
flush this out.

Finally we need some way to drive capabilities out of the swdev.
Even rocker switch needs this to indicate it doesn't support matching
on all the sw_flow fields. Without this its not clear to me how to
manage the device from user space. I tried writing user space daemon
for the simpler flow director interface and the try and see model
breaks quickly.


+#endif
  };

  /**
diff --git a/include/net/sw_flow.h b/include/net/sw_flow.h
index 21724f1..3af7758 100644
--- a/include/net/sw_flow.h
+++ b/include/net/sw_flow.h
@@ -81,7 +81,21 @@ struct sw_flow_mask {
struct sw_flow_key key;
  };

+enum sw_flow_action_type {
+   SW_FLOW_ACTION_TYPE_OUTPUT,
+   SW_FLOW_ACTION_TYPE_VLAN_PUSH,
+   SW_FLOW_ACTION_TYPE_VLAN_POP,
+};
+


OK my previous comment about having another patch to create
generic actions seems to be resolved here. I'm not sure how
important it is but if we abstract the flow types away from
OVS is there any reason not to reuse and relabel the action
types as well? I guess we can't break userspace API but maybe
a 1:1 mapping would be better?


  struct sw_flow_action {
+   enum sw_flow_action_type type;
+   union {
+   u32 out_port_ifindex;
+   struct {
+   __be16 vlan_proto;
+   u16 vlan_tci;
+   } vlan;
+   };
  };


[...]

I think my comments could be addressed with additional patches
if you want. I could help but it will be another week or so
before I have some time. The biggest issue IMO is the lack of
capabilities queries.

Thanks,
John


--
John Fastabend Intel Corporation
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to a new directory.

2014-09-03 Thread Alin Serdean
Hi Ankur,

Just a small nit I you use "git format-patch" could you also add the -M option? 

Thank you,
Alin.

-Mesaj original-
De la: dev [mailto:dev-boun...@openvswitch.org] În numele Ankur Sharma
Trimis: Wednesday, September 3, 2014 4:04 AM
Către: dev@openvswitch.org
Subiect: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to 
a new directory.

In this change we have created a new directory named Netlink inside 
datapath-windows/ovsext/. This directory will be used to keep all the netlink 
related files.

The reason we have created new directory is that for 'put' related APIs we will 
be adding netlink buffer mgmt files as well. These files will take the count of 
netlink related files to 5. Hence we decided to club the netlink files in a 
single directory.
---
 datapath-windows/automake.mk   |   6 +-
 datapath-windows/include/OvsPub.h  |   2 +-
 datapath-windows/ovsext/Netlink.c  | 469 -
 datapath-windows/ovsext/Netlink.h  | 104 --
 datapath-windows/ovsext/Netlink/Netlink.c  | 469 +
 datapath-windows/ovsext/Netlink/Netlink.h  | 104 ++
 datapath-windows/ovsext/Netlink/NetlinkProto.h | 116 ++
 datapath-windows/ovsext/NetlinkProto.h | 116 --
 datapath-windows/ovsext/ovsext.vcxproj |   8 +-
 datapath-windows/ovsext/precomp.h  |   4 +-
 10 files changed, 699 insertions(+), 699 deletions(-)  delete mode 100644 
datapath-windows/ovsext/Netlink.c  delete mode 100644 
datapath-windows/ovsext/Netlink.h  create mode 100644 
datapath-windows/ovsext/Netlink/Netlink.c
 create mode 100644 datapath-windows/ovsext/Netlink/Netlink.h
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkProto.h
 delete mode 100644 datapath-windows/ovsext/NetlinkProto.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 
eb59274..5ea0197 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -5,9 +5,6 @@ EXTRA_DIST += \
datapath-windows/Package/package.VcxProj.user \
datapath-windows/include/OvsDpInterfaceExt.h \
datapath-windows/include/OvsPub.h\
-   datapath-windows/ovsext/Netlink.c \
-   datapath-windows/ovsext/Netlink.h \
-   datapath-windows/ovsext/NetlinkProto.h \
datapath-windows/misc/install.cmd \
datapath-windows/misc/uninstall.cmd \
datapath-windows/ovsext.sln \
@@ -33,6 +30,9 @@ EXTRA_DIST += \
datapath-windows/ovsext/IpHelper.h \
datapath-windows/ovsext/Jhash.c \
datapath-windows/ovsext/Jhash.h \
+   datapath-windows/ovsext/Netlink/Netlink.c \
+   datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
datapath-windows/ovsext/Oid.h \
diff --git a/datapath-windows/include/OvsPub.h 
b/datapath-windows/include/OvsPub.h
index 0446309..36814c4 100644
--- a/datapath-windows/include/OvsPub.h
+++ b/datapath-windows/include/OvsPub.h
@@ -17,7 +17,7 @@
 #ifndef __OVS_PUB_H_
 #define __OVS_PUB_H_ 1
 
-#include "../ovsext/Netlink.h"
+#include "../ovsext/Netlink/Netlink.h"
 
 #define OVS_DRIVER_MAJOR_VER 1
 #define OVS_DRIVER_MINOR_VER 0
diff --git a/datapath-windows/ovsext/Netlink.c 
b/datapath-windows/ovsext/Netlink.c
deleted file mode 100644
index 90a633b..000
--- a/datapath-windows/ovsext/Netlink.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "precomp.h"
-#include "NetlinkProto.h"
-#include "Netlink.h"
-
-#ifdef OVS_DBG_MOD
-#undef OVS_DBG_MOD
-#endif
-#define OVS_DBG_MOD OVS_DBG_NETLINK
-#include "Debug.h"
-
-/*
- * ---
- * Netlink message accessing the payload.
- * ---
- */
-PVOID
-NlMsgAt(const PNL_MSG_HDR nlh, UINT32 offset) -{
-return ((PCHAR)nlh + offset);
-}
-
-/*
- * ---
- * Returns the size of netlink message.
- * ---
- */
-UINT32
-NlMsgSize(const PNL_MSG_HDR nlh)
-{
-return nlh->nlmsgLen;
-}
-
-/*
- * --

Re: [ovs-dev] [Patch] Documentation for DPDK IVSHMEM VM Communications

2014-09-03 Thread Polehn, Mike A
The setup for packet transfer between the switch and VM by shared memory 
(IVSHMEM) is moderately complex and most details are not easily found.  Also 
this is a different transfer method than user side vhost which copies between 
the separate memory spaces at a cost of slower packet rate or higher CPU core 
load(s). Shared memory transfer is much more efficient transfer method since it 
is only copying packet pointers and not packet data. However lacks security 
since the VM can see all the packet buffer memory space at all times. However 
efficiency vs security is something only the user of the system can determine 
since they know if the system is a closed environment or not.

I put in enough details to allow someone, who has not been intimately involved 
in doing IVSHMEM packet processing work to setup and get the shared memory 
transfer working in a short time given today's current build state. Including 
the system packages to allow a proper build (qemu in particular) maybe over 
kill, but figuring out these required packages can be very time consuming. 
Unless it is working, you cannot experiment with or test the IVSHMEM shared 
memory operation or even move the method forward as an alternative setup, the 
correct information is just not readily available, weeks can easily be spent 
(only if you are very determined to get it to work).

The INSTALL.DPDK also needs to be update for DPDK 1.7 ...

Would you like to have this put in as separate doc, INSTALL.DPDK.IVSHMEM?

Mike

-Original Message-
From: Pravin Shelar [mailto:pshe...@nicira.com] 
Sent: Friday, August 29, 2014 3:54 PM
To: Polehn, Mike A
Cc: d...@openvswitch.com
Subject: Re: [ovs-dev] [Patch] Documentation for DPDK IVSHMEM VM Communications

On Fri, Aug 15, 2014 at 7:07 AM, Polehn, Mike A  wrote:
> Adds documentation on how to run IVSHMEM communication through VM.
>
I think INSTALL.DPDK is getting rather large and hard to understand with all 
details.
so I dropped "Alternative method to get QEMU, download and build from OVDK" 
section.
We can add this documentation to separate file once vhost support is added.

Thanks.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to a new directory.

2014-09-03 Thread Ankur Sharma
Hi Alin,

Sure. Sorry i missed that.
Do you want me to spin up a v2 with -M enabled in format-patch?

Thanks.

Regards,
Ankur

From: Alin Serdean 
Sent: Wednesday, September 3, 2014 9:29 AM
To: Ankur Sharma; dev@openvswitch.org
Subject: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Hi Ankur,

Just a small nit I you use "git format-patch" could you also add the -M option?

Thank you,
Alin.

-Mesaj original-
De la: dev [mailto:dev-boun...@openvswitch.org] În numele Ankur Sharma
Trimis: Wednesday, September 3, 2014 4:04 AM
Către: dev@openvswitch.org
Subiect: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to 
a new directory.

In this change we have created a new directory named Netlink inside 
datapath-windows/ovsext/. This directory will be used to keep all the netlink 
related files.

The reason we have created new directory is that for 'put' related APIs we will 
be adding netlink buffer mgmt files as well. These files will take the count of 
netlink related files to 5. Hence we decided to club the netlink files in a 
single directory.
---
 datapath-windows/automake.mk   |   6 +-
 datapath-windows/include/OvsPub.h  |   2 +-
 datapath-windows/ovsext/Netlink.c  | 469 -
 datapath-windows/ovsext/Netlink.h  | 104 --
 datapath-windows/ovsext/Netlink/Netlink.c  | 469 +
 datapath-windows/ovsext/Netlink/Netlink.h  | 104 ++
 datapath-windows/ovsext/Netlink/NetlinkProto.h | 116 ++
 datapath-windows/ovsext/NetlinkProto.h | 116 --
 datapath-windows/ovsext/ovsext.vcxproj |   8 +-
 datapath-windows/ovsext/precomp.h  |   4 +-
 10 files changed, 699 insertions(+), 699 deletions(-)  delete mode 100644 
datapath-windows/ovsext/Netlink.c  delete mode 100644 
datapath-windows/ovsext/Netlink.h  create mode 100644 
datapath-windows/ovsext/Netlink/Netlink.c
 create mode 100644 datapath-windows/ovsext/Netlink/Netlink.h
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkProto.h
 delete mode 100644 datapath-windows/ovsext/NetlinkProto.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 
eb59274..5ea0197 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -5,9 +5,6 @@ EXTRA_DIST += \
datapath-windows/Package/package.VcxProj.user \
datapath-windows/include/OvsDpInterfaceExt.h \
datapath-windows/include/OvsPub.h\
-   datapath-windows/ovsext/Netlink.c \
-   datapath-windows/ovsext/Netlink.h \
-   datapath-windows/ovsext/NetlinkProto.h \
datapath-windows/misc/install.cmd \
datapath-windows/misc/uninstall.cmd \
datapath-windows/ovsext.sln \
@@ -33,6 +30,9 @@ EXTRA_DIST += \
datapath-windows/ovsext/IpHelper.h \
datapath-windows/ovsext/Jhash.c \
datapath-windows/ovsext/Jhash.h \
+   datapath-windows/ovsext/Netlink/Netlink.c \
+   datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
datapath-windows/ovsext/Oid.h \
diff --git a/datapath-windows/include/OvsPub.h 
b/datapath-windows/include/OvsPub.h
index 0446309..36814c4 100644
--- a/datapath-windows/include/OvsPub.h
+++ b/datapath-windows/include/OvsPub.h
@@ -17,7 +17,7 @@
 #ifndef __OVS_PUB_H_
 #define __OVS_PUB_H_ 1

-#include "../ovsext/Netlink.h"
+#include "../ovsext/Netlink/Netlink.h"

 #define OVS_DRIVER_MAJOR_VER 1
 #define OVS_DRIVER_MINOR_VER 0
diff --git a/datapath-windows/ovsext/Netlink.c 
b/datapath-windows/ovsext/Netlink.c
deleted file mode 100644
index 90a633b..000
--- a/datapath-windows/ovsext/Netlink.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * 
https://urldefense.proofpoint.com/v1/url?u=http://www.apache.org/licenses/LICENSE-2.0&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=f6EhnZ0ORGZNt5QbYmRaOxfWfx%2Bqd3KEiPf3%2FYaollU%3D%0A&m=NApZCLVrk%2BNJghlUzWylWRAfuowX6mDyk5J1AmnytQI%3D%0A&s=1aab2a1fea4277bc891aed636302ce8d5f5497e8747ca43e64714709e829ab37
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "precomp.h"
-#include "NetlinkProto.h"
-#include "Netlink.h"
-
-#ifdef OVS_DBG_MOD
-#undef OVS_DBG_MOD
-#endif
-#define OVS_DBG_MOD OVS_DBG_NETLINK
-#include "Debug.h"
-
-/*
- * --

Re: [ovs-dev] [patch net-next 10/13] openvswitch: add support for datapath hardware offload

2014-09-03 Thread John Fastabend

On 09/03/2014 02:24 AM, Jiri Pirko wrote:

Benefit from the possibility to work with flows in switch devices and
use the swdev api to offload flow datapath.

Signed-off-by: Jiri Pirko 
---
  net/openvswitch/Makefile   |   3 +-
  net/openvswitch/datapath.c |  33 ++
  net/openvswitch/datapath.h |   3 +
  net/openvswitch/flow_table.c   |   1 +
  net/openvswitch/hw_offload.c   | 245 +
  net/openvswitch/hw_offload.h   |  22 
  net/openvswitch/vport-netdev.c |   3 +
  net/openvswitch/vport.h|   2 +
  8 files changed, 311 insertions(+), 1 deletion(-)
  create mode 100644 net/openvswitch/hw_offload.c
  create mode 100644 net/openvswitch/hw_offload.h

diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
index 3591cb5..5152437 100644
--- a/net/openvswitch/Makefile
+++ b/net/openvswitch/Makefile
@@ -13,7 +13,8 @@ openvswitch-y := \
flow_table.o \
vport.o \
vport-internal_dev.o \
-   vport-netdev.o
+   vport-netdev.o \
+   hw_offload.o

  ifneq ($(CONFIG_OPENVSWITCH_VXLAN),)
  openvswitch-y += vport-vxlan.o
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 75bb07f..3e43e1d 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -57,6 +57,7 @@
  #include "flow_netlink.h"
  #include "vport-internal_dev.h"
  #include "vport-netdev.h"
+#include "hw_offload.h"

  int ovs_net_id __read_mostly;

@@ -864,6 +865,9 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
acts = NULL;
goto err_unlock_ovs;
}
+   error = ovs_hw_flow_insert(dp, new_flow);
+   if (error)
+   pr_warn("failed to insert flow into hw\n");


This is really close to silently failing. I think we need to
hard fail here somehow and push it back to userspace as part of
the reply and ovs_notify.

Otherwise I don't know how to manage the hardware correctly. Consider
the hardware table is full. In this case user space will continue to
add rules and they will be silently discarded. Similarly if user space
adds a flow/action that can not be supported by the hardware it will
be silently ignored.

Even if we do careful accounting on resources in user space we could
still get an ENOMEM error from sw_flow_action_create.

Same comment for the other hw commands flush/remove.


if (unlikely(reply)) {
error = ovs_flow_cmd_fill_info(new_flow,
@@ -896,10 +900,18 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
goto err_unlock_ovs;
}
}



[...]


Thanks,
John

--
John Fastabend Intel Corporation
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] OVS-on-HyperV: Agenda for IRC meeting on 9/3

2014-09-03 Thread Eitan Eliahu


Hi, here are some items we would like to follow up on. Please feel free to add 
anything you would like to discuss.


NetLink
--
i) #Action-item: Alin to send out patches for dpif-linux.c soon.
 ii) Sam will be working on the vport commands - dump and add to start with, 
and the rest later.
 iii) Ankur will be working on the flow commands - dump, put, flush.
 iv) Created an issue for packet execute (issue #44)


 Events discussion:
-
Eitan led the discussion on events with Alin and Samuel, and we have agreement 
on how we are going to go about it. Eitan will work on some preliminary patches 
in userspace, and hand it off to Alin. After that, Eitan will focus on the 
kernel changes, and work with Alin on getting the integration completed.

 Packet receive:
--
Packet receive functionality implementation can possibly go along with the 
Events implementation, but we'll wait till Eitan takes the first stab at the 
changes so we don't to re-invent the wheel.

Thanks,
Eitan
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to a new directory.

2014-09-03 Thread Alin Serdean
Please. 

I will look over the other patches after the meeting :).

Alin.

-Mesaj original-
De la: Ankur Sharma [mailto:ankursha...@vmware.com] 
Trimis: Wednesday, September 3, 2014 7:34 PM
Către: Alin Serdean; dev@openvswitch.org
Subiect: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Hi Alin,

Sure. Sorry i missed that.
Do you want me to spin up a v2 with -M enabled in format-patch?

Thanks.

Regards,
Ankur

From: Alin Serdean 
Sent: Wednesday, September 3, 2014 9:29 AM
To: Ankur Sharma; dev@openvswitch.org
Subject: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Hi Ankur,

Just a small nit I you use "git format-patch" could you also add the -M option?

Thank you,
Alin.

-Mesaj original-
De la: dev [mailto:dev-boun...@openvswitch.org] În numele Ankur Sharma
Trimis: Wednesday, September 3, 2014 4:04 AM
Către: dev@openvswitch.org
Subiect: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to 
a new directory.

In this change we have created a new directory named Netlink inside 
datapath-windows/ovsext/. This directory will be used to keep all the netlink 
related files.

The reason we have created new directory is that for 'put' related APIs we will 
be adding netlink buffer mgmt files as well. These files will take the count of 
netlink related files to 5. Hence we decided to club the netlink files in a 
single directory.
---
 datapath-windows/automake.mk   |   6 +-
 datapath-windows/include/OvsPub.h  |   2 +-
 datapath-windows/ovsext/Netlink.c  | 469 -
 datapath-windows/ovsext/Netlink.h  | 104 --
 datapath-windows/ovsext/Netlink/Netlink.c  | 469 +
 datapath-windows/ovsext/Netlink/Netlink.h  | 104 ++
 datapath-windows/ovsext/Netlink/NetlinkProto.h | 116 ++
 datapath-windows/ovsext/NetlinkProto.h | 116 --
 datapath-windows/ovsext/ovsext.vcxproj |   8 +-
 datapath-windows/ovsext/precomp.h  |   4 +-
 10 files changed, 699 insertions(+), 699 deletions(-)  delete mode 100644 
datapath-windows/ovsext/Netlink.c  delete mode 100644 
datapath-windows/ovsext/Netlink.h  create mode 100644 
datapath-windows/ovsext/Netlink/Netlink.c
 create mode 100644 datapath-windows/ovsext/Netlink/Netlink.h
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkProto.h
 delete mode 100644 datapath-windows/ovsext/NetlinkProto.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 
eb59274..5ea0197 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -5,9 +5,6 @@ EXTRA_DIST += \
datapath-windows/Package/package.VcxProj.user \
datapath-windows/include/OvsDpInterfaceExt.h \
datapath-windows/include/OvsPub.h\
-   datapath-windows/ovsext/Netlink.c \
-   datapath-windows/ovsext/Netlink.h \
-   datapath-windows/ovsext/NetlinkProto.h \
datapath-windows/misc/install.cmd \
datapath-windows/misc/uninstall.cmd \
datapath-windows/ovsext.sln \
@@ -33,6 +30,9 @@ EXTRA_DIST += \
datapath-windows/ovsext/IpHelper.h \
datapath-windows/ovsext/Jhash.c \
datapath-windows/ovsext/Jhash.h \
+   datapath-windows/ovsext/Netlink/Netlink.c \
+   datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
datapath-windows/ovsext/Oid.h \
diff --git a/datapath-windows/include/OvsPub.h 
b/datapath-windows/include/OvsPub.h
index 0446309..36814c4 100644
--- a/datapath-windows/include/OvsPub.h
+++ b/datapath-windows/include/OvsPub.h
@@ -17,7 +17,7 @@
 #ifndef __OVS_PUB_H_
 #define __OVS_PUB_H_ 1

-#include "../ovsext/Netlink.h"
+#include "../ovsext/Netlink/Netlink.h"

 #define OVS_DRIVER_MAJOR_VER 1
 #define OVS_DRIVER_MINOR_VER 0
diff --git a/datapath-windows/ovsext/Netlink.c 
b/datapath-windows/ovsext/Netlink.c
deleted file mode 100644
index 90a633b..000
--- a/datapath-windows/ovsext/Netlink.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at:
- *
- * 
https://urldefense.proofpoint.com/v1/url?u=http://www.apache.org/licenses/LICENSE-2.0&k=oIvRg1%2BdGAgOoM1BIlLLqw%3D%3D%0A&r=f6EhnZ0ORGZNt5QbYmRaOxfWfx%2Bqd3KEiPf3%2FYaollU%3D%0A&m=NApZCLVrk%2BNJghlUzWylWRAfuowX6mDyk5J1AmnytQI%3D%0A&s=1aab2a1fea4277bc891aed636302ce8d5f5497e8747ca43e64714709e829ab37
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY 

Re: [ovs-dev] [Patch] Documentation for DPDK IVSHMEM VM Communications

2014-09-03 Thread Flavio Leitner
On Fri, Aug 29, 2014 at 03:54:08PM -0700, Pravin Shelar wrote:
> On Fri, Aug 15, 2014 at 7:07 AM, Polehn, Mike A  
> wrote:
> > Adds documentation on how to run IVSHMEM communication through VM.
> >
> I think INSTALL.DPDK is getting rather large and hard to understand
> with all details.
> so I dropped "Alternative method to get QEMU, download and build from
> OVDK" section.
> We can add this documentation to separate file once vhost support is added.

It's better to have extra info than no info at all.

Since all these features are recent, it's hard to find any info out
there, so please keep it.

fbl
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] Makefiles: Add $(AM_V_GEN) annotations to clean up "make" output.

2014-09-03 Thread Ben Pfaff
The Open vSwitch "make" output was still pretty verbose even when
configured with --enable-silent-rules.  This cleans it up.

Signed-off-by: Ben Pfaff 
---
I tested that this yields good output with the following travis build:
https://travis-ci.org/openvswitch/ovs/builds/34310033
Have I mentioned that travis is awesome?

 Makefile.am  |4 +--
 datapath-windows/include/automake.mk |2 +-
 debian/automake.mk   |1 +
 include/automake.mk  |2 +-
 include/openflow/automake.mk |2 +-
 lib/automake.mk  |   22 -
 ofproto/automake.mk  |4 +--
 ovsdb/automake.mk|6 ++---
 python/automake.mk   |8 +++---
 rhel/automake.mk |4 +--
 tests/automake.mk|   45 --
 vswitchd/automake.mk |8 +++---
 vtep/automake.mk |8 +++---
 xenserver/automake.mk|4 +--
 14 files changed, 63 insertions(+), 57 deletions(-)

diff --git a/Makefile.am b/Makefile.am
index 3c2ae89..38055bf 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -134,7 +134,7 @@ ro_shell = printf '\043 Generated automatically -- do not 
modify!-*- buffer-
 
 SUFFIXES += .in
 .in:
-   $(PERL) $(srcdir)/build-aux/soexpand.pl -I$(srcdir) < $< | \
+   $(AM_V_GEN)$(PERL) $(srcdir)/build-aux/soexpand.pl -I$(srcdir) < $< | \
sed \
-e 's,[@]PKIDIR[@],$(PKIDIR),g' \
 -e 's,[@]LOGDIR[@],$(LOGDIR),g' \
@@ -153,7 +153,7 @@ SUFFIXES += .in
@if head -n 1 $@.tmp | grep '#!' > /dev/null; then \
chmod +x $@.tmp; \
fi
-   $(AM_V_GEN) mv $@.tmp $@
+   $(AM_V_at) mv $@.tmp $@
 
 .PHONY: clean-pycov
 clean-pycov:
diff --git a/datapath-windows/include/automake.mk 
b/datapath-windows/include/automake.mk
index ac945cb..31f2fe2 100644
--- a/datapath-windows/include/automake.mk
+++ b/datapath-windows/include/automake.mk
@@ -3,7 +3,7 @@ BUILT_SOURCES += 
$(srcdir)/datapath-windows/include/OvsDpInterface.h
 $(srcdir)/datapath-windows/include/OvsDpInterface.h: \
  datapath/linux/compat/include/linux/openvswitch.h \
  build-aux/extract-odp-netlink-windows-dp-h
-   sed -f $(srcdir)/build-aux/extract-odp-netlink-windows-dp-h < $< > $@
+   $(AM_V_GEN)sed -f $(srcdir)/build-aux/extract-odp-netlink-windows-dp-h 
< $< > $@
 
 EXTRA_DIST += $(srcdir)/build-aux/extract-odp-netlink-windows-dp-h
 
diff --git a/debian/automake.mk b/debian/automake.mk
index 86c1310..c29a560 100644
--- a/debian/automake.mk
+++ b/debian/automake.mk
@@ -71,6 +71,7 @@ ALL_LOCAL += check-debian-changelog-version
 DIST_HOOKS += check-debian-changelog-version
 
 $(srcdir)/debian/copyright: AUTHORS debian/copyright.in
+   $(AM_V_GEN) \
{ sed -n -e '/%AUTHORS%/q' -e p < $(srcdir)/debian/copyright.in;   \
  sed '1,/^$$/d' $(srcdir)/AUTHORS |   \
sed -n -e '/^$$/q' -e 's/^/  /p';  \
diff --git a/include/automake.mk b/include/automake.mk
index 55cb353..6a4cf86 100644
--- a/include/automake.mk
+++ b/include/automake.mk
@@ -2,7 +2,7 @@ BUILT_SOURCES += include/odp-netlink.h
 
 include/odp-netlink.h: datapath/linux/compat/include/linux/openvswitch.h \
build-aux/extract-odp-netlink-h
-   sed -f $(srcdir)/build-aux/extract-odp-netlink-h < $< > $@
+   $(AM_V_GEN)sed -f $(srcdir)/build-aux/extract-odp-netlink-h < $< > $@
 EXTRA_DIST += build-aux/extract-odp-netlink-h
 CLEANFILES += include/odp-netlink.h
 
diff --git a/include/openflow/automake.mk b/include/openflow/automake.mk
index 2938642..14f7a47 100644
--- a/include/openflow/automake.mk
+++ b/include/openflow/automake.mk
@@ -12,7 +12,7 @@ if HAVE_PYTHON
 SUFFIXES += .h .hstamp
 
 .h.hstamp:
-   $(run_python) $(srcdir)/build-aux/check-structs -I$(srcdir)/include $<
+   $(AM_V_GEN)$(run_python) $(srcdir)/build-aux/check-structs 
-I$(srcdir)/include $< && \
touch $@
 
 HSTAMP_FILES = \
diff --git a/lib/automake.mk b/lib/automake.mk
index d46613f..83f6855 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -356,11 +356,11 @@ if HAVE_OPENSSL
 lib_libopenvswitch_la_SOURCES += lib/stream-ssl.c
 nodist_lib_libopenvswitch_la_SOURCES += lib/dhparams.c
 lib/dhparams.c: lib/dh1024.pem lib/dh2048.pem lib/dh4096.pem
-   (echo '#include "lib/dhparams.h"' &&\
+   $(AM_V_GEN)(echo '#include "lib/dhparams.h"' && \
 openssl dhparam -C -in $(srcdir)/lib/dh1024.pem -noout &&  \
 openssl dhparam -C -in $(srcdir)/lib/dh2048.pem -noout &&  \
 openssl dhparam -C -in $(srcdir)/lib/dh4096.pem -noout)\
-   | sed 's/\(get_dh[0-9]*\)()/\1(void)/' > lib/dhparams.c.tmp
+   | sed 's/\(get_dh[0-9]*\)()/\1(void)/' > lib/dhparams.c.tmp &&  \
mv lib/dhparams.c.t

Re: [ovs-dev] [patch net-next 01/13] openvswitch: split flow structures into ovs specific and generic ones

2014-09-03 Thread Pravin Shelar
On Wed, Sep 3, 2014 at 2:24 AM, Jiri Pirko  wrote:
> After this, flow related structures can be used in other code.
>
> Signed-off-by: Jiri Pirko 
> ---
>  include/net/sw_flow.h  |  99 ++
>  net/openvswitch/actions.c  |   3 +-
>  net/openvswitch/datapath.c |  74 +-
>  net/openvswitch/datapath.h |   4 +-
>  net/openvswitch/flow.c |   6 +--
>  net/openvswitch/flow.h | 102 +++
>  net/openvswitch/flow_netlink.c |  53 +-
>  net/openvswitch/flow_netlink.h |  10 ++--
>  net/openvswitch/flow_table.c   | 118 
> ++---
>  net/openvswitch/flow_table.h   |  30 +--
>  net/openvswitch/vport-gre.c|   4 +-
>  net/openvswitch/vport-vxlan.c  |   2 +-
>  net/openvswitch/vport.c|   2 +-
>  net/openvswitch/vport.h|   2 +-
>  14 files changed, 276 insertions(+), 233 deletions(-)
>  create mode 100644 include/net/sw_flow.h
>
> diff --git a/include/net/sw_flow.h b/include/net/sw_flow.h
> new file mode 100644
> index 000..21724f1
> --- /dev/null
> +++ b/include/net/sw_flow.h
> @@ -0,0 +1,99 @@
> +/*
> + * include/net/sw_flow.h - Generic switch flow structures
> + * Copyright (c) 2007-2012 Nicira, Inc.
> + * Copyright (c) 2014 Jiri Pirko 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License as published by
> + * the Free Software Foundation; either version 2 of the License, or
> + * (at your option) any later version.
> + */
> +
> +#ifndef _NET_SW_FLOW_H_
> +#define _NET_SW_FLOW_H_
> +
> +struct sw_flow_key_ipv4_tunnel {
> +   __be64 tun_id;
> +   __be32 ipv4_src;
> +   __be32 ipv4_dst;
> +   __be16 tun_flags;
> +   u8   ipv4_tos;
> +   u8   ipv4_ttl;
> +};
> +
> +struct sw_flow_key {
> +   struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel key. 
> */
> +   struct {
> +   u32 priority;   /* Packet QoS priority. */
> +   u32 skb_mark;   /* SKB mark. */
> +   u16 in_port;/* Input switch port (or 
> DP_MAX_PORTS). */
> +   } __packed phy; /* Safe when right after 'tun_key'. */
> +   struct {
> +   u8 src[ETH_ALEN];   /* Ethernet source address. */
> +   u8 dst[ETH_ALEN];   /* Ethernet destination address. */
> +   __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set 
> otherwise. */
> +   __be16 type;/* Ethernet frame type. */
> +   } eth;
> +   struct {
> +   u8 proto;   /* IP protocol or lower 8 bits of ARP 
> opcode. */
> +   u8 tos; /* IP ToS. */
> +   u8 ttl; /* IP TTL/hop limit. */
> +   u8 frag;/* One of OVS_FRAG_TYPE_*. */
> +   } ip;
> +   struct {
> +   __be16 src; /* TCP/UDP/SCTP source port. */
> +   __be16 dst; /* TCP/UDP/SCTP destination port. */
> +   __be16 flags;   /* TCP flags. */
> +   } tp;
> +   union {
> +   struct {
> +   struct {
> +   __be32 src; /* IP source address. */
> +   __be32 dst; /* IP destination address. */
> +   } addr;
> +   struct {
> +   u8 sha[ETH_ALEN];   /* ARP source 
> hardware address. */
> +   u8 tha[ETH_ALEN];   /* ARP target 
> hardware address. */
> +   } arp;
> +   } ipv4;
> +   struct {
> +   struct {
> +   struct in6_addr src;/* IPv6 source 
> address. */
> +   struct in6_addr dst;/* IPv6 destination 
> address. */
> +   } addr;
> +   __be32 label;   /* IPv6 flow label. */
> +   struct {
> +   struct in6_addr target; /* ND target address. 
> */
> +   u8 sll[ETH_ALEN];   /* ND source link 
> layer address. */
> +   u8 tll[ETH_ALEN];   /* ND target link 
> layer address. */
> +   } nd;
> +   } ipv6;
> +   };
> +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as longs. 
> */
> +

HW offload API should be separate from OVS module. This has following
advantages.
1. It can be managed by OVS userspace vswitchd process which has much
better context to setup hardware flow table. Once we add capabilities
for swdev, it is much more easier for vswitchd process to choose
correct (hw or sw) flow table for given flow.
2. Other application that wants to use HW offload does not have

Re: [ovs-dev] [patch net-next 01/13] openvswitch: split flow structures into ovs specific and generic ones

2014-09-03 Thread Pravin Shelar
On Wed, Sep 3, 2014 at 8:20 AM, John Fastabend  wrote:
> On 09/03/2014 02:24 AM, Jiri Pirko wrote:
>>
>> After this, flow related structures can be used in other code.
>>
>> Signed-off-by: Jiri Pirko 
>> ---
>
>
> Hi Jiri,
>
> As I indicated before I'm looking into integrating this with some
> hardware here. Progress is a bit slow but starting to look at it.The
> i40e/ixgbe driver being one open source example with very limited
> support for tables, flow matches, etc. And then a closed source driver
> with much more flexibility. What I don't have is a middle of the road
> switch to work with something better then a host nic but not as
> flexible as a TOR.
>
> Couple questions my assumption here is I can extend the flow_key
> as needed to support additional match criteria my hardware has.
> I scanned the ./net/openvswitch source and I didn't catch any
> place that would break but might need to take a closer look.
> Similarly the actions set will need to be extended. For example
> if I want to use this with i40e a OVS_ACTION_ATTR_QUEUE could
> be used to steer packets to the queue. With this in mind we
> will want a follow up patch to rename OVS_ACTION_ATTR_* to
> FLOW_ACTION_ATTR_*
>

struct sw_flow_key is internal structure of OVS, it is designed to
have better flow-table performance. By adding hw specific fields in
sw_flow_key, it increase flow-key size and that has negative impact on
OVS software switching performance. Therefore it is better not to
share this internal structure with driver interface.

Thanks.

> Also I have some filters that can match on offset/length/mask
> tuples. As far as I can tell this is going to have to be yet
> another interface? Or would it be worth the effort to define
> the flow key more generically. My initial guess is I'll just
> write a separate interface. I think this is what Jamal referred
> to as another "classifier".
>
> Thanks,
> John
>
> [...]
>
>
>> +
>> +struct sw_flow_key_ipv4_tunnel {
>> +   __be64 tun_id;
>> +   __be32 ipv4_src;
>> +   __be32 ipv4_dst;
>> +   __be16 tun_flags;
>> +   u8   ipv4_tos;
>> +   u8   ipv4_ttl;
>> +};
>> +
>> +struct sw_flow_key {
>> +   struct sw_flow_key_ipv4_tunnel tun_key;  /* Encapsulating tunnel
>> key. */
>> +   struct {
>> +   u32 priority;   /* Packet QoS priority. */
>> +   u32 skb_mark;   /* SKB mark. */
>> +   u16 in_port;/* Input switch port (or
>> DP_MAX_PORTS). */
>> +   } __packed phy; /* Safe when right after 'tun_key'. */
>> +   struct {
>> +   u8 src[ETH_ALEN];   /* Ethernet source address. */
>> +   u8 dst[ETH_ALEN];   /* Ethernet destination address.
>> */
>> +   __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT
>> set otherwise. */
>> +   __be16 type;/* Ethernet frame type. */
>> +   } eth;
>> +   struct {
>> +   u8 proto;   /* IP protocol or lower 8 bits of
>> ARP opcode. */
>> +   u8 tos; /* IP ToS. */
>> +   u8 ttl; /* IP TTL/hop limit. */
>> +   u8 frag;/* One of OVS_FRAG_TYPE_*. */
>> +   } ip;
>> +   struct {
>> +   __be16 src; /* TCP/UDP/SCTP source port. */
>> +   __be16 dst; /* TCP/UDP/SCTP destination port.
>> */
>> +   __be16 flags;   /* TCP flags. */
>> +   } tp;
>> +   union {
>> +   struct {
>> +   struct {
>> +   __be32 src; /* IP source address. */
>> +   __be32 dst; /* IP destination address.
>> */
>> +   } addr;
>> +   struct {
>> +   u8 sha[ETH_ALEN];   /* ARP source
>> hardware address. */
>> +   u8 tha[ETH_ALEN];   /* ARP target
>> hardware address. */
>> +   } arp;
>> +   } ipv4;
>> +   struct {
>> +   struct {
>> +   struct in6_addr src;/* IPv6 source
>> address. */
>> +   struct in6_addr dst;/* IPv6
>> destination address. */
>> +   } addr;
>> +   __be32 label;   /* IPv6 flow
>> label. */
>> +   struct {
>> +   struct in6_addr target; /* ND target
>> address. */
>> +   u8 sll[ETH_ALEN];   /* ND source link
>> layer address. */
>> +   u8 tll[ETH_ALEN];   /* ND target link
>> layer address. */
>> +   } nd;
>> +   } ipv6;
>> +   };
>> +} __aligned(BITS_PER_LONG/8); /* Ensure that we can do comparisons as
>> longs. */
>> +
>> +struct sw_flow_key_range {
>> +   unsigned short int start;
>> +   

[ovs-dev] [v5 1/2] datapath: Remove recirc stack depth limit check

2014-09-03 Thread Andy Zhou
Future patches will change the recirc action implementation to not
using recursion. The stack depth detection is no longer necessary.

Signed-off-by: Andy Zhou 

---
v4->v5:  no change
---
 datapath/actions.c  | 63 -
 datapath/datapath.c |  6 ++---
 datapath/datapath.h |  4 ++--
 datapath/vport.c|  2 +-
 4 files changed, 10 insertions(+), 65 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 43ca2a0..0a22e55 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -831,7 +831,7 @@ static int execute_recirc(struct datapath *dp, struct 
sk_buff *skb,
}
 
flow_key_set_recirc_id(skb, nla_get_u32(a));
-   ovs_dp_process_packet(skb, true);
+   ovs_dp_process_packet(skb);
return 0;
 }
 
@@ -924,63 +924,8 @@ static int do_execute_actions(struct datapath *dp, struct 
sk_buff *skb,
return 0;
 }
 
-/* We limit the number of times that we pass into execute_actions()
- * to avoid blowing out the stack in the event that we have a loop.
- *
- * Each loop adds some (estimated) cost to the kernel stack.
- * The loop terminates when the max cost is exceeded.
- * */
-#define RECIRC_STACK_COST 1
-#define DEFAULT_STACK_COST 4
-/* Allow up to 4 regular services, and up to 3 recirculations */
-#define MAX_STACK_COST (DEFAULT_STACK_COST * 4 + RECIRC_STACK_COST * 3)
-
-struct loop_counter {
-   u8 stack_cost;  /* loop stack cost. */
-   bool looping;   /* Loop detected? */
-};
-
-static DEFINE_PER_CPU(struct loop_counter, loop_counters);
-
-static int loop_suppress(struct datapath *dp, struct sw_flow_actions *actions)
-{
-   if (net_ratelimit())
-   pr_warn("%s: flow loop detected, dropping\n",
-   ovs_dp_name(dp));
-   actions->actions_len = 0;
-   return -ELOOP;
-}
-
 /* Execute a list of actions against 'skb'. */
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
-   struct sw_flow_actions *acts, bool recirc)
-{
-   const u8 stack_cost = recirc ? RECIRC_STACK_COST : DEFAULT_STACK_COST;
-   struct loop_counter *loop;
-   int error;
-
-   /* Check whether we've looped too much. */
-   loop = &__get_cpu_var(loop_counters);
-   loop->stack_cost += stack_cost;
-   if (unlikely(loop->stack_cost > MAX_STACK_COST))
-   loop->looping = true;
-   if (unlikely(loop->looping)) {
-   error = loop_suppress(dp, acts);
-   kfree_skb(skb);
-   goto out_loop;
-   }
-
-   error = do_execute_actions(dp, skb, acts->actions, acts->actions_len);
-
-   /* Check whether sub-actions looped too much. */
-   if (unlikely(loop->looping))
-   error = loop_suppress(dp, acts);
-
-out_loop:
-   /* Decrement loop stack cost. */
-   loop->stack_cost -= stack_cost;
-   if (!loop->stack_cost)
-   loop->looping = false;
-
-   return error;
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, struct 
sw_flow_actions *acts)
+{
+   return do_execute_actions(dp, skb, acts->actions, acts->actions_len);
 }
diff --git a/datapath/datapath.c b/datapath/datapath.c
index b6eadef..a668222 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -251,7 +251,7 @@ void ovs_dp_detach_port(struct vport *p)
 }
 
 /* Must be called with rcu_read_lock. */
-void ovs_dp_process_packet(struct sk_buff *skb, bool recirc)
+void ovs_dp_process_packet(struct sk_buff *skb)
 {
const struct vport *p = OVS_CB(skb)->input_vport;
struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key;
@@ -283,7 +283,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, bool recirc)
ovs_flow_stats_update(flow, pkt_key->tp.flags, skb);
 
sf_acts = rcu_dereference(flow->sf_acts);
-   ovs_execute_actions(dp, skb, sf_acts, recirc);
+   ovs_execute_actions(dp, skb, sf_acts);
stats_counter = &stats->n_hit;
 
 out:
@@ -581,7 +581,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, 
struct genl_info *info)
sf_acts = rcu_dereference(flow->sf_acts);
 
local_bh_disable();
-   err = ovs_execute_actions(dp, packet, sf_acts, false);
+   err = ovs_execute_actions(dp, packet, sf_acts);
local_bh_enable();
rcu_read_unlock();
 
diff --git a/datapath/datapath.h b/datapath/datapath.h
index e414225..eba2fc4 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -188,7 +188,7 @@ extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_family dp_vport_genl_family;
 extern struct genl_multicast_group ovs_dp_vport_multicast_group;
 
-void ovs_dp_process_packet(struct sk_buff *, bool recirc);
+void ovs_dp_process_packet(struct sk_buff *c);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
  const struct dp_upcall_info *);
@@ -198,7 +198,7 @@ struct sk_buff *ovs_vport_cmd_build_info(s

[ovs-dev] [v5 2/2] datapath: Implement recirc action without recursion

2014-09-03 Thread Andy Zhou
Since kernel stack is limited in size, it is not wise to using
recursive function with large stack frames.

This patch provides an alternative implementation of recirc action
without using recursion.

A per CPU fixed sized, 'deferred action FIFO', is used to store either
recirc or sample actions encountered during execution of an action
list. Not executing recirc or sample action in place, but rather execute
them laster as 'deferred actions' avoids recursion.

Deferred actions are only executed after all other actions has been
executed, including the ones triggered by loopback from the kernel
network stack.

The size of the private FIFO, currently set to 20, limits the number
of total 'deferred actions' any one packet can accumulate.

Signed-off-by: Andy Zhou 

---
v4->v5:
Reset fifo after processing deferred actions
move private data structures from actions.h to actions.c
remove action_fifo init functions, since default percpu data
   will be zero.
---
 datapath/Modules.mk |   1 +
 datapath/actions.c  | 175 
 datapath/actions.h  |  31 ++
 datapath/datapath.c |   1 +
 datapath/datapath.h |   4 +-
 5 files changed, 197 insertions(+), 15 deletions(-)
 create mode 100644 datapath/actions.h

diff --git a/datapath/Modules.mk b/datapath/Modules.mk
index 90e158c..2e74f6e 100644
--- a/datapath/Modules.mk
+++ b/datapath/Modules.mk
@@ -23,6 +23,7 @@ openvswitch_sources = \
 
 openvswitch_headers = \
compat.h \
+   actions.h \
datapath.h \
flow.h \
flow_netlink.h \
diff --git a/datapath/actions.c b/datapath/actions.c
index 0a22e55..6ad5bbe 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -39,6 +39,74 @@
 #include "mpls.h"
 #include "vlan.h"
 #include "vport.h"
+#include "actions.h"
+
+struct ovs_deferred_action {
+   struct sk_buff *skb;
+   const struct nlattr *actions;
+
+   /* Store pkt_key clone when creating deferred action. */
+   struct sw_flow_key pkt_key;
+};
+
+#define OVS_DEFERRED_ACTION_FIFO_SIZE 20
+struct ovs_action_fifo {
+   int head;
+   int tail;
+   /* Deferred action fifo queue storage. */
+   struct ovs_deferred_action fifo[OVS_DEFERRED_ACTION_FIFO_SIZE];
+};
+
+static DEFINE_PER_CPU(struct ovs_action_fifo, action_fifos);
+#define OVS_EXEC_ACTIONS_COUNT_LIMIT 4   /* limit used to detect packet
+   looping by the network stack */
+static DEFINE_PER_CPU(int, ovs_exec_actions_count);
+
+static inline void action_fifo_init(struct ovs_action_fifo *fifo)
+{
+   fifo->head = 0;
+   fifo->tail = 0;
+}
+
+static inline bool action_fifo_is_empty(struct ovs_action_fifo *fifo)
+{
+   return (fifo->head == fifo->tail);
+}
+
+static inline struct ovs_deferred_action *
+action_fifo_get(struct ovs_action_fifo *fifo)
+{
+   if (action_fifo_is_empty(fifo))
+   return NULL;
+
+   return &fifo->fifo[fifo->tail++];
+}
+
+static inline struct ovs_deferred_action *
+action_fifo_put(struct ovs_action_fifo *fifo)
+{
+   if (fifo->head >= OVS_DEFERRED_ACTION_FIFO_SIZE - 1)
+   return NULL;
+
+   return &fifo->fifo[fifo->head++];
+}
+
+static inline struct ovs_deferred_action *
+add_deferred_actions(struct sk_buff *skb, const struct nlattr *attr)
+{
+   struct ovs_action_fifo *fifo;
+   struct ovs_deferred_action *da;
+
+   fifo = this_cpu_ptr(&(action_fifos));
+   da = action_fifo_put(fifo);
+
+   if (da) {
+   da->skb = skb;
+   da->actions = attr;
+   }
+
+   return da;
+}
 
 static void flow_key_clone(struct sk_buff *skb, struct sw_flow_key *new_key)
 {
@@ -689,9 +757,9 @@ static bool last_action(const struct nlattr *a, int rem)
 static int sample(struct datapath *dp, struct sk_buff *skb,
  const struct nlattr *attr)
 {
-   struct sw_flow_key sample_key;
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
+   struct ovs_deferred_action *da;
int rem;
 
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
@@ -728,10 +796,19 @@ static int sample(struct datapath *dp, struct sk_buff 
*skb,
/* Skip the sample action when out of memory. */
return 0;
 
-   flow_key_clone(skb, &sample_key);
+   da = add_deferred_actions(skb, a);
+   if (!da) {
+   if (net_ratelimit())
+   pr_warn("%s: deferred actions limit reached, dropping 
sample action\n",
+   ovs_dp_name(dp));
 
-   /* do_execute_actions() will consume the cloned skb. */
-   return do_execute_actions(dp, skb, a, rem);
+   kfree_skb(skb);
+   return 0;
+   }
+
+   flow_key_clone(skb, &da->pkt_key);
+
+   return 0;
 }
 
 static void execute_hash(struct sk_buff *skb, const struct nlattr *attr)
@@ -750,7 +827,7 @@ static void execute_hash(struct sk_buff *

Re: [ovs-dev] [PATCH 5/5] Netlink.c: Added netlink put APIs.

2014-09-03 Thread Alin Serdean
Acked-by: Alin Gabriel Serdean 

-Mesaj original-
De la: dev [mailto:dev-boun...@openvswitch.org] În numele Ankur Sharma
Trimis: Tuesday, September 2, 2014 6:05 PM
Către: dev@openvswitch.org
Subiect: [ovs-dev] [PATCH 5/5] Netlink.c: Added netlink put APIs.

In this change we have added the APIs for putting netlink headers, attributes 
in a buffer.

The buffer is managed through NetlinkBuf.[c|h].
---
 datapath-windows/ovsext/Netlink/Netlink.c | 403 +- 
 datapath-windows/ovsext/Netlink/Netlink.h |  28 +++
 2 files changed, 427 insertions(+), 4 deletions(-)

diff --git a/datapath-windows/ovsext/Netlink/Netlink.c 
b/datapath-windows/ovsext/Netlink/Netlink.c
index 90a633b..8c6b139 100644
--- a/datapath-windows/ovsext/Netlink/Netlink.c
+++ b/datapath-windows/ovsext/Netlink/Netlink.c
@@ -24,6 +24,402 @@
 #define OVS_DBG_MOD OVS_DBG_NETLINK
 #include "Debug.h"
 
+/* 
+===
+===
+ * This file provides simple netlink get, put and validation APIs.
+ * Most of the code is on similar lines as userspace netlink implementation.
+ *
+ * TODO: Convert these methods to inline.
+ * 
+===
+===
+ */
+
+/*
+ * 
+---
+
+ * Adds Netlink Header to the NL_BUF.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutNlHdr(PNL_BUF buf, PNL_MSG_HDR nlMsg) {
+if ((NlBufCopyAtOffset(buf, (PCHAR)nlMsg, NLMSG_HDRLEN, 0))) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * 
+---
+
+ * Adds Genl Header to the NL_BUF.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutGenlHdr(PNL_BUF buf, PGENL_MSG_HDR genlMsg) {
+if ((NlBufCopyAtOffset(buf, (PCHAR)genlMsg, GENL_HDRLEN, NLMSG_HDRLEN))) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * 
+---
+
+ * Adds OVS Header to the NL_BUF.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutOvsHdr(PNL_BUF buf, POVS_HDR ovsHdr) {
+if ((NlBufCopyAtOffset(buf, (PCHAR)ovsHdr, OVS_HDRLEN,
+   GENL_HDRLEN + NLMSG_HDRLEN))) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * 
+---
+
+ * Adds data of length 'len' to the tail end of NL_BUF.
+ * Refer nl_msg_put for more details.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutTail(PNL_BUF buf, const PCHAR data, UINT32 len) {
+len = NLMSG_ALIGN(len);
+if (NlBufCopyAtTail(buf, data, len)) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * 
+---
+
+ * memsets length 'len' at tail end of NL_BUF.
+ * Refer nl_msg_put_uninit for more details.
+ * 
+---
+
+ */
+PCHAR
+NlMsgPutTailUninit(PNL_BUF buf, UINT32 len) {
+len = NLMSG_ALIGN(len);
+return NlBufCopyAtTailUninit(buf, len); }
+
+/*
+ * 
+---
+
+ * Adds an attribute to the tail end of buffer. It does
+ * not copy the attribute payload.
+ * Refer nl_msg_put_unspec_uninit for more details.
+ * 
+---
+
+ */
+PCHAR
+NlMsgPutTailUnspecUninit(PNL_BUF buf, UINT16 type, UINT16 len) {
+PCHAR ret = NULL;
+UINT16 totalLen = NLA_HDRLEN + len;
+PNL_ATTR nla = (PNL_ATTR)(NlMsgPutTailUninit(buf, totalLen));
+
+if (!nla) {
+goto done;
+}
+
+ret = (PCHAR)(nla + 1);
+nla->nlaLen = totalLen;
+nla->nlaType = type;
+
+done:
+return ret;
+}
+
+/*
+ * 
+---
+
+ * Adds an attribute to the tail end of buffer. It copies attribute
+ * payload as well.
+ * Refer nl_msg_put_unspec for more details.
+ * 
+---
+
+ */
+BOOLEAN
+NlMsgPutTailUnspec(PNL_BUF buf, UINT16 type, PCHAR data, UINT16 len) {
+BOOLEAN ret = TRUE;
+PCHAR nlaData = NlMsgPutTailUnspecUninit(buf, type, len);
+
+if (!nlaData) {
+ret = FALSE;
+goto done;
+}
+
+RtlCopyMemory(nlaData, data, len);
+
+done:
+return ret;
+}
+
+/*
+ * 
+---
+
+ * Adds an attribute of 'type' and no payload at the tail end of buffer.
+ * Refer nl_msg_put_flag for more details.
+ * 
+---
+---

Re: [ovs-dev] [PATCH 4/5] OvsTypes.h : Added support for BE16

2014-09-03 Thread Alin Serdean
Acked-by: Alin Gabriel Serdean 

-Mesaj original-
De la: dev [mailto:dev-boun...@openvswitch.org] În numele Ankur Sharma
Trimis: Tuesday, September 2, 2014 6:05 PM
Către: dev@openvswitch.org
Subiect: [ovs-dev] [PATCH 4/5] OvsTypes.h : Added support for BE16

---
 datapath-windows/ovsext/Types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datapath-windows/ovsext/Types.h b/datapath-windows/ovsext/Types.h 
index e48df7a..b2ef48c 100644
--- a/datapath-windows/ovsext/Types.h
+++ b/datapath-windows/ovsext/Types.h
@@ -31,6 +31,7 @@ typedef uint8 __u8;
 
 /* Defines the  userspace specific data types for file
  * included within kernel only. */
+typedef UINT16 BE16;
 typedef UINT32 BE32;
 typedef UINT64 BE64;
 
--
1.9.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 3/5] NetlinkProto.h: Minor fix for typos and new macro for padding.

2014-09-03 Thread Alin Serdean
Acked-by: Alin Gabriel Serdean 

-Mesaj original-
De la: dev [mailto:dev-boun...@openvswitch.org] În numele Ankur Sharma
Trimis: Tuesday, September 2, 2014 6:05 PM
Către: dev@openvswitch.org
Subiect: [ovs-dev] [PATCH 3/5] NetlinkProto.h: Minor fix for typos and new 
macro for padding.

Added a new macro for calculating the number of bytes required for padding. 
Fixed a minor typo.
---
 datapath-windows/ovsext/Netlink/NetlinkProto.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/datapath-windows/ovsext/Netlink/NetlinkProto.h 
b/datapath-windows/ovsext/Netlink/NetlinkProto.h
index 898cc84..2c438a6 100644
--- a/datapath-windows/ovsext/Netlink/NetlinkProto.h
+++ b/datapath-windows/ovsext/Netlink/NetlinkProto.h
@@ -39,6 +39,9 @@
 /* Returns X rounded up to the nearest multiple of Y. */  #define ROUND_UP(X, 
Y) (DIV_ROUND_UP(X, Y) * (Y))
 
+/* Returns the least number that, when added to X, yields a multiple of 
+Y. */ #define PAD_SIZE(X, Y) (ROUND_UP(X, Y) - (X))
+
 /* Netlink message */
 
 /* nlmsg_flags bits. */
@@ -92,7 +95,7 @@ typedef struct _GENL_MSG_HDR {
 UINT8 cmd;
 UINT8 version;
 UINT16 reserved;
-} GENL_MSG_HDR, *PGENL_MDG_HDR;
+} GENL_MSG_HDR, *PGENL_MSG_HDR;
 BUILD_ASSERT_DECL(sizeof(GENL_MSG_HDR) == 4);
 
 /* Netlink attributes */
--
1.9.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/5] NetlinkBuf.c: Netlink buffer mgmt apis.

2014-09-03 Thread Alin Serdean
Maybe just rename NL_BUF with NL_BUFFER otherwise I am fine with the patch.

Acked-by: Alin Gabriel Serdean 

-Mesaj original-
De la: dev [mailto:dev-boun...@openvswitch.org] În numele Ankur Sharma
Trimis: Tuesday, September 2, 2014 6:05 PM
Către: dev@openvswitch.org
Subiect: [ovs-dev] [PATCH 2/5] NetlinkBuf.c: Netlink buffer mgmt apis.

In this change we have introduced buffer mgmt apis which will be used while 
creating netlink messages. The basic functionality provided by apis is on 
similar lines to ofpbuf in userspace with an exception that it will not do run 
time buffer reallocation.

Signed-off-by: Ankur Sharma 
Tested-by: Ankur Sharma 
Reported-at: https://github.com/openvswitch/ovs-issues/issues/37
---
 datapath-windows/automake.mk |   2 +
 datapath-windows/ovsext/Netlink/NetlinkBuf.c | 349 +++ 
 datapath-windows/ovsext/Netlink/NetlinkBuf.h |  41 
 datapath-windows/ovsext/ovsext.vcxproj   |   2 +
 4 files changed, 394 insertions(+)
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.c
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 
5ea0197..297a809 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -32,6 +32,8 @@ EXTRA_DIST += \
datapath-windows/ovsext/Jhash.h \
datapath-windows/ovsext/Netlink/Netlink.c \
datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkBuf.c \
+   datapath-windows/ovsext/Netlink/NetlinkBuf.h \
datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
diff --git a/datapath-windows/ovsext/Netlink/NetlinkBuf.c 
b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
new file mode 100644
index 000..97436ea
--- /dev/null
+++ b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* 
+===
+===
+ * This is a simple buffer mangement framework specific for netlink protocol.
+ * The name could be confused with ovsext/BufferMgmt.c. 
+Ovsext/BufferMgmt.c
+ * deals with buffer mgmt for NBLs. Where as this framework deals with
+ * management of buffer that holds a netlink message.
+ *
+ * This framework provides APIs for putting/accessing data in a buffer. 
+These
+ * APIs are used by driver's netlink protocol implementation.
+ *
+ * We can see this framework as a subset of ofpbuf in ovs userspace.
+ *
+ * This framework is NOT a generic buffer management framework (ofpbuf
+ * is a generic buffer mgmt framework) and provides only the 
+functioanlities
+ * which would be useful for netlink protocol. Some of the key features are:
+ *
+ * a. It DOES NOT support automatic buffer reallocation.
+ *i. A netlink input/output message is a static buffer.
+ * b. The unused space is at the tail.
+ * c. There is no notion of headdroom.
+ * 
+===
+===
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_NETLINK
+#include "Debug.h"
+#include "NetlinkBuf.h"
+
+/* Returns used space in the buffer */
+#define NL_BUF_USED_SPACE(_buf)   (_buf->bufLen -\
+   _buf->bufRemLen)
+
+/* Validates that offset is within buffer boundaries and will not
+ * create holes in the buffer.*/
+#define NL_BUF_IS_VALID_OFFSET(_buf, _offset) (_offset <=\
+   
+NL_BUF_TAIL_OFFSET(_buf) ? 1 : 0)
+
+/* Validates if new data of size _size can be added at offset _offset.
+ * This macor assumes that offset validation has been done.*/
+#define NL_BUF_CAN_ADD(_buf, _size, _offset)  (((_offset + _size <=  \
+  _buf->bufLen) && (_size\
+  <= _buf->bufRemLen)) ? \
+  1 : 0)
+
+/* Returns the offset of tail wrt buffer head */
+#define NL_BUF_TAIL_OFFSET(_buf)  (_buf->tail - _buf->head)
+
+static __inline VOID
+_NlBufCopyAtTailUnsafe(PNL_BUF nlBu

Re: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to a new directory.

2014-09-03 Thread Ankur Sharma
Hi Alin,

Thanks for reviewing the patches.
I'll spin up a v2 with following changes.

a. Use format-patch with -M for 1/5
b. Rename NL_BUF to NL_BUFFER.

Is it ok if i put your name along with Eitan's in Ack-By while submitting v2 or 
would like to review v2 then let me add your name to Ack-by?

Thanks.

Regards,
Ankur


From: Alin Serdean 
Sent: Wednesday, September 3, 2014 10:00 AM
To: Ankur Sharma; dev@openvswitch.org
Subject: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Please.

I will look over the other patches after the meeting :).

Alin.

-Mesaj original-
De la: Ankur Sharma [mailto:ankursha...@vmware.com]
Trimis: Wednesday, September 3, 2014 7:34 PM
Către: Alin Serdean; dev@openvswitch.org
Subiect: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Hi Alin,

Sure. Sorry i missed that.
Do you want me to spin up a v2 with -M enabled in format-patch?

Thanks.

Regards,
Ankur

From: Alin Serdean 
Sent: Wednesday, September 3, 2014 9:29 AM
To: Ankur Sharma; dev@openvswitch.org
Subject: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Hi Ankur,

Just a small nit I you use "git format-patch" could you also add the -M option?

Thank you,
Alin.

-Mesaj original-
De la: dev [mailto:dev-boun...@openvswitch.org] În numele Ankur Sharma
Trimis: Wednesday, September 3, 2014 4:04 AM
Către: dev@openvswitch.org
Subiect: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to 
a new directory.

In this change we have created a new directory named Netlink inside 
datapath-windows/ovsext/. This directory will be used to keep all the netlink 
related files.

The reason we have created new directory is that for 'put' related APIs we will 
be adding netlink buffer mgmt files as well. These files will take the count of 
netlink related files to 5. Hence we decided to club the netlink files in a 
single directory.
---
 datapath-windows/automake.mk   |   6 +-
 datapath-windows/include/OvsPub.h  |   2 +-
 datapath-windows/ovsext/Netlink.c  | 469 -
 datapath-windows/ovsext/Netlink.h  | 104 --
 datapath-windows/ovsext/Netlink/Netlink.c  | 469 +
 datapath-windows/ovsext/Netlink/Netlink.h  | 104 ++
 datapath-windows/ovsext/Netlink/NetlinkProto.h | 116 ++
 datapath-windows/ovsext/NetlinkProto.h | 116 --
 datapath-windows/ovsext/ovsext.vcxproj |   8 +-
 datapath-windows/ovsext/precomp.h  |   4 +-
 10 files changed, 699 insertions(+), 699 deletions(-)  delete mode 100644 
datapath-windows/ovsext/Netlink.c  delete mode 100644 
datapath-windows/ovsext/Netlink.h  create mode 100644 
datapath-windows/ovsext/Netlink/Netlink.c
 create mode 100644 datapath-windows/ovsext/Netlink/Netlink.h
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkProto.h
 delete mode 100644 datapath-windows/ovsext/NetlinkProto.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 
eb59274..5ea0197 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -5,9 +5,6 @@ EXTRA_DIST += \
datapath-windows/Package/package.VcxProj.user \
datapath-windows/include/OvsDpInterfaceExt.h \
datapath-windows/include/OvsPub.h\
-   datapath-windows/ovsext/Netlink.c \
-   datapath-windows/ovsext/Netlink.h \
-   datapath-windows/ovsext/NetlinkProto.h \
datapath-windows/misc/install.cmd \
datapath-windows/misc/uninstall.cmd \
datapath-windows/ovsext.sln \
@@ -33,6 +30,9 @@ EXTRA_DIST += \
datapath-windows/ovsext/IpHelper.h \
datapath-windows/ovsext/Jhash.c \
datapath-windows/ovsext/Jhash.h \
+   datapath-windows/ovsext/Netlink/Netlink.c \
+   datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
datapath-windows/ovsext/Oid.h \
diff --git a/datapath-windows/include/OvsPub.h 
b/datapath-windows/include/OvsPub.h
index 0446309..36814c4 100644
--- a/datapath-windows/include/OvsPub.h
+++ b/datapath-windows/include/OvsPub.h
@@ -17,7 +17,7 @@
 #ifndef __OVS_PUB_H_
 #define __OVS_PUB_H_ 1

-#include "../ovsext/Netlink.h"
+#include "../ovsext/Netlink/Netlink.h"

 #define OVS_DRIVER_MAJOR_VER 1
 #define OVS_DRIVER_MINOR_VER 0
diff --git a/datapath-windows/ovsext/Netlink.c 
b/datapath-windows/ovsext/Netlink.c
deleted file mode 100644
index 90a633b..000
--- a/datapath-windows/ovsext/Netlink.c
+++ /dev/null
@@ -1,469 +0,0 @@
-/*
- * Copyright (c) 2008, 2009, 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance

Re: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to a new directory.

2014-09-03 Thread Alin Serdean
Sure be my guest :).

-Mesaj original-
De la: Ankur Sharma [mailto:ankursha...@vmware.com] 
Trimis: Wednesday, September 3, 2014 1:14 PM
Către: Alin Serdean; dev@openvswitch.org
Subiect: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Hi Alin,

Thanks for reviewing the patches.
I'll spin up a v2 with following changes.

a. Use format-patch with -M for 1/5
b. Rename NL_BUF to NL_BUFFER.

Is it ok if i put your name along with Eitan's in Ack-By while submitting v2 or 
would like to review v2 then let me add your name to Ack-by?

Thanks.

Regards,
Ankur


From: Alin Serdean 
Sent: Wednesday, September 3, 2014 10:00 AM
To: Ankur Sharma; dev@openvswitch.org
Subject: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Please.

I will look over the other patches after the meeting :).

Alin.

-Mesaj original-
De la: Ankur Sharma [mailto:ankursha...@vmware.com]
Trimis: Wednesday, September 3, 2014 7:34 PM
Către: Alin Serdean; dev@openvswitch.org
Subiect: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Hi Alin,

Sure. Sorry i missed that.
Do you want me to spin up a v2 with -M enabled in format-patch?

Thanks.

Regards,
Ankur

From: Alin Serdean 
Sent: Wednesday, September 3, 2014 9:29 AM
To: Ankur Sharma; dev@openvswitch.org
Subject: RE: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files 
to a new directory.

Hi Ankur,

Just a small nit I you use "git format-patch" could you also add the -M option?

Thank you,
Alin.

-Mesaj original-
De la: dev [mailto:dev-boun...@openvswitch.org] În numele Ankur Sharma
Trimis: Wednesday, September 3, 2014 4:04 AM
Către: dev@openvswitch.org
Subiect: [ovs-dev] [PATCH 1/5] datapath-windows/Netlink: Move netlink files to 
a new directory.

In this change we have created a new directory named Netlink inside 
datapath-windows/ovsext/. This directory will be used to keep all the netlink 
related files.

The reason we have created new directory is that for 'put' related APIs we will 
be adding netlink buffer mgmt files as well. These files will take the count of 
netlink related files to 5. Hence we decided to club the netlink files in a 
single directory.
---
 datapath-windows/automake.mk   |   6 +-
 datapath-windows/include/OvsPub.h  |   2 +-
 datapath-windows/ovsext/Netlink.c  | 469 -
 datapath-windows/ovsext/Netlink.h  | 104 --
 datapath-windows/ovsext/Netlink/Netlink.c  | 469 +
 datapath-windows/ovsext/Netlink/Netlink.h  | 104 ++
 datapath-windows/ovsext/Netlink/NetlinkProto.h | 116 ++
 datapath-windows/ovsext/NetlinkProto.h | 116 --
 datapath-windows/ovsext/ovsext.vcxproj |   8 +-
 datapath-windows/ovsext/precomp.h  |   4 +-
 10 files changed, 699 insertions(+), 699 deletions(-)  delete mode 100644 
datapath-windows/ovsext/Netlink.c  delete mode 100644 
datapath-windows/ovsext/Netlink.h  create mode 100644 
datapath-windows/ovsext/Netlink/Netlink.c
 create mode 100644 datapath-windows/ovsext/Netlink/Netlink.h
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkProto.h
 delete mode 100644 datapath-windows/ovsext/NetlinkProto.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk index 
eb59274..5ea0197 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -5,9 +5,6 @@ EXTRA_DIST += \
datapath-windows/Package/package.VcxProj.user \
datapath-windows/include/OvsDpInterfaceExt.h \
datapath-windows/include/OvsPub.h\
-   datapath-windows/ovsext/Netlink.c \
-   datapath-windows/ovsext/Netlink.h \
-   datapath-windows/ovsext/NetlinkProto.h \
datapath-windows/misc/install.cmd \
datapath-windows/misc/uninstall.cmd \
datapath-windows/ovsext.sln \
@@ -33,6 +30,9 @@ EXTRA_DIST += \
datapath-windows/ovsext/IpHelper.h \
datapath-windows/ovsext/Jhash.c \
datapath-windows/ovsext/Jhash.h \
+   datapath-windows/ovsext/Netlink/Netlink.c \
+   datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
datapath-windows/ovsext/Oid.h \
diff --git a/datapath-windows/include/OvsPub.h 
b/datapath-windows/include/OvsPub.h
index 0446309..36814c4 100644
--- a/datapath-windows/include/OvsPub.h
+++ b/datapath-windows/include/OvsPub.h
@@ -17,7 +17,7 @@
 #ifndef __OVS_PUB_H_
 #define __OVS_PUB_H_ 1

-#include "../ovsext/Netlink.h"
+#include "../ovsext/Netlink/Netlink.h"

 #define OVS_DRIVER_MAJOR_VER 1
 #define OVS_DRIVER_MINOR_VER 0
diff --git a/datapath-windows/ovsext/Netlink.c 
b/datapath-windows/ovsext/Netlink.c
deleted file mode 100644
index

Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce port_try_ref() to prevent a race.

2014-09-03 Thread Alex Wang
After discussing with Pravin offline, it is good to use first get the
reference
and then access the 'port->netdev' for pmd check.

So, use a nested if statement:

@@ -1864,20 +1874,27 @@ pmd_load_queues(struct pmd_thread *f,
 index = 0;

 CMAP_FOR_EACH (port, node, &f->dp->ports) {
-if (netdev_is_pmd(port->netdev)) {
-int i;
-
-for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
-if ((index % dp->n_pmd_threads) == id) {
-poll_list = xrealloc(poll_list, sizeof *poll
-
-port_ref(port);
-poll_list[poll_cnt].port = port;
-poll_list[poll_cnt].rx = port->rxq[i];
-poll_cnt++;
+/* Calls port_try_ref() to prevent the main thread
+ * from deleting the port. */
+if (port_try_ref(port)) {
+if (netdev_is_pmd(port->netdev)) {
+int i;
+
+for (i = 0; i < netdev_n_rxq(port->netdev); i++)
+if ((index % dp->n_pmd_threads) == id) {
+poll_list = xrealloc(poll_list,
+sizeof *poll_list * (pol
+
+port_ref(port);
+poll_list[poll_cnt].port = port;
+poll_list[poll_cnt].rx = port->rxq[i];
+poll_cnt++;
+}
+index++;
 }
-index++;
 }
+/* Unrefs the port_try_ref(). */
+port_unref(port);
 }

Will apply to master soon,


On Tue, Sep 2, 2014 at 9:39 PM, Alex Wang  wrote:

> > @@ -1864,7 +1874,10 @@ pmd_load_queues(struct pmd_thread *f,
>>  >  index = 0;
>> >
>> >  CMAP_FOR_EACH (port, node, &f->dp->ports) {
>> > -if (netdev_is_pmd(port->netdev)) {
>> > +/* Calls port_try_ref() to prevent the main thread
>> > + * from deleting the port. */
>> > +if (netdev_is_pmd(port->netdev)
>> > +&& port_try_ref(port)) {
>> >  int i;
>> >
>> port_try_ref() check should be first in the condition.
>>
>> Otherwise looks good.
>> Acked-by: Pravin B Shelar 
>>
>>
>
> Could you explain more about why?  if port_try_ref() is called first,
> then we will try reference the both dpdk and non_dpdk port.  And
> we need to un-reference the port outside the if statement.
>
> However, when we try to un-reference the port, we dont know if the
> reference succeeds.
>
>
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] ovs-atomics: Add atomic support for Windows.

2014-09-03 Thread Gurucharan Shetty
On Thu, Aug 28, 2014 at 4:39 PM, Jarno Rajahalme  wrote:
>
> On Aug 28, 2014, at 9:57 AM, Gurucharan Shetty  wrote:
>> diff --git a/lib/ovs-atomic-msvc.h b/lib/ovs-atomic-msvc.h
>> new file mode 100644
>> index 000..f357545
>> --- /dev/null
>> +++ b/lib/ovs-atomic-msvc.h
>> @@ -0,0 +1,370 @@
>> +/*
>> + * Copyright (c) 2014 Nicira, Inc.
>> + *
>> + * Licensed under the Apache License, Version 2.0 (the "License");
>> + * you may not use this file except in compliance with the License.
>> + * You may obtain a copy of the License at:
>> + *
>> + * http://www.apache.org/licenses/LICENSE-2.0
>> + *
>> + * Unless required by applicable law or agreed to in writing, software
>> + * distributed under the License is distributed on an "AS IS" BASIS,
>> + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>> + * See the License for the specific language governing permissions and
>> + * limitations under the License.
>> + */
>> +
>> +/* This header implements atomic operation primitives using pthreads. */
>> +#ifndef IN_OVS_ATOMIC_H
>> +#error "This header should only be included indirectly via ovs-atomic.h."
>> +#endif
>> +
>> +/* From msdn documentation: With Visual Studio 2003, volatile to volatile
>> +references are ordered; the compiler will not re-order volatile variable
>> +access. With Visual Studio 2005, the compiler also uses acquire semantics
>> +for read operations on volatile variables and release semantics for write
>> +operations on volatile variables (when supported by the CPU). */
>
> Is this still the same for MSVC 2012 or 2013?
There is no explicit documentation stating that, but looking at MSVC's
c++ c11 atomic implementation, it looks clear that it is the case. I
will add an explicit comment for this.


>> +
>> +static inline void
>> +atomic_compiler_barrier(memory_order order OVS_UNUSED)
>> +{
>> +_ReadWriteBarrier();
>> +}
>
> You could do “if (order > memory_order_consume) {“ to avoid unnecessary 
> barriers.
Okay
>
>> +
>> +static inline void
>> +atomic_thread_fence(memory_order order)
>> +{
>> +if (order == memory_order_seq_cst) {
>> +MemoryBarrier();
>> +}
>> +}
>
> This needs an “else { atomic_compiler_barrier(order); }"
Yes. I will fix it up.

>
>> +
>> +static inline void
>> +atomic_signal_fence(memory_order order)
>> +{
>> +atomic_compiler_barrier(order);
>> +}
>> +
>> +/* 1, 2 and 4 bytes loads and stores are atomic on aligned memory. In 
>> addition,
>> + * since the compiler automatically takes acquire and release semantics on
>> + * volatile variables, for any order lesser than 'memory_order_seq_cst', we
>> + * can directly assign or read values. */
>> +
>> +#define atomic_store32(DST, SRC, ORDER) \
>> +if (ORDER == memory_order_seq_cst) {\
>> +InterlockedExchange((int32_t volatile *) DST,   \
>> +   (int32_t ) SRC); \
>> +} else {\
>> +*DST = SRC; \
>> +}
>> +
>> +/* 64 bit reads and write are not atomic on x86.
>
> Just noticed this:
>
> “8.1.1 Guaranteed Atomic Operations
>
> The Intel486 processor (and newer processors since) guarantees that the 
> following
> basic memory operations will always be carried out atomically:
> - Reading or writing a byte
> - Reading or writing a word aligned on a 16-bit boundary
> - Reading or writing a doubleword aligned on a 32-bit boundary
>
> The Pentium processor (and newer processors since) guarantees that the 
> following
> additional memory operations will always be carried out atomically:
> - Reading or writing a quadword aligned on a 64-bit boundary
> - 16-bit accesses to uncached memory locations that fit within a 32-bit data 
> bus
>
> The P6 family processors (and newer processors since) guarantee that the 
> following
> additional memory operation will always be carried out atomically:
> - Unaligned 16-, 32-, and 64-bit accesses to cached memory that fit within a 
> cache line”
>
> So, it might be worth it to limit the support for i586, and check alignment 
> at run time to avoid the lock on 64-bit load and store. Or it may be that 
> InterlockedExchange64() already does that?
Okay. I will include this file only for >= i586. It is unlikely that
anyone still uses older machines than that. So it shouldn't be a
problem. I also see with sample programs that 64 bit variables are 64
bit aligned even with 32 bit builds. But to make sure that it is
always the case, I will add an abort() if it is not 64 bit aligned.

>> +
>> +#define atomic_read32(SRC, DST, ORDER)\
>> +if (ORDER == memory_order_seq_cst) {  \
>> +*DST = InterlockedOr((int32_t volatile *) SRC, 0);\
>> +} else {  \
>> +*DST = *SRC;   

[ovs-dev] [PATCH v2] ovs-atomics: Add atomic support Windows.

2014-09-03 Thread Gurucharan Shetty
Before this change (i.e., with pthread locks for atomics on Windows),
the benchmark for cmap and hmap was as follows:

$ ./tests/ovstest.exe test-cmap benchmark 1000 3 1
Benchmarking with n=1000, 3 threads, 1.00% mutations:
cmap insert:  61070 ms
cmap iterate:  2750 ms
cmap search:  14238 ms
cmap destroy:  8354 ms

hmap insert:   1701 ms
hmap iterate:   985 ms
hmap search:   3755 ms
hmap destroy:  1052 ms

After this change, the benchmark is as follows:
$ ./tests/ovstest.exe test-cmap benchmark 1000 3 1
Benchmarking with n=1000, 3 threads, 1.00% mutations:
cmap insert:   3666 ms
cmap iterate:   365 ms
cmap search:   2016 ms
cmap destroy:  1331 ms

hmap insert:   1495 ms
hmap iterate:  1026 ms
hmap search:   4167 ms
hmap destroy:  1046 ms

So there is clearly a big improvement for cmap.

But the correspondig test on Linux (with gcc 4.6) yeilds the following:

./tests/ovstest test-cmap benchmark 1000 3 1
Benchmarking with n=1000, 3 threads, 1.00% mutations:
cmap insert:   3917 ms
cmap iterate:   355 ms
cmap search:871 ms
cmap destroy:  1158 ms

hmap insert:   1988 ms
hmap iterate:  1005 ms
hmap search:   5428 ms
hmap destroy:   980 ms

So for this particular test, except for "cmap search", Windows and
Linux have similar performance. Windows is around 2.5x slower in "cmap search"
compared to Linux. This has to be investigated.

Signed-off-by: Gurucharan Shetty 
---
 lib/automake.mk   |1 +
 lib/ovs-atomic-msvc.h |  398 +
 lib/ovs-atomic.h  |2 +
 3 files changed, 401 insertions(+)
 create mode 100644 lib/ovs-atomic-msvc.h

diff --git a/lib/automake.mk b/lib/automake.mk
index d46613f..f371ee0 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -152,6 +152,7 @@ lib_libopenvswitch_la_SOURCES = \
lib/ovs-atomic-i586.h \
lib/ovs-atomic-locked.c \
lib/ovs-atomic-locked.h \
+   lib/ovs-atomic-msvc.h \
lib/ovs-atomic-pthreads.h \
lib/ovs-atomic-x86_64.h \
lib/ovs-atomic.h \
diff --git a/lib/ovs-atomic-msvc.h b/lib/ovs-atomic-msvc.h
new file mode 100644
index 000..b1699d8
--- /dev/null
+++ b/lib/ovs-atomic-msvc.h
@@ -0,0 +1,398 @@
+/*
+ * Copyright (c) 2014 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* This header implements atomic operation primitives for MSVC
+ * on i586 or greater platforms (32 bit). */
+#ifndef IN_OVS_ATOMIC_H
+#error "This header should only be included indirectly via ovs-atomic.h."
+#endif
+
+/* From msdn documentation: With Visual Studio 2003, volatile to volatile
+ * references are ordered; the compiler will not re-order volatile variable
+ * access. With Visual Studio 2005, the compiler also uses acquire semantics
+ * for read operations on volatile variables and release semantics for write
+ * operations on volatile variables (when supported by the CPU).
+ *
+ * Though there is no clear documentation that states that anything greater
+ * than VS 2005 has the same behavior as described above, looking through MSVCs
+ * C++ atomics library in VS2013 shows that the compiler still takes
+ * acquire/release semantics on volatile variables. */
+#define ATOMIC(TYPE) TYPE volatile
+
+typedef enum {
+memory_order_relaxed,
+memory_order_consume,
+memory_order_acquire,
+memory_order_release,
+memory_order_acq_rel,
+memory_order_seq_cst
+} memory_order;
+
+#define ATOMIC_BOOL_LOCK_FREE 2
+#define ATOMIC_CHAR_LOCK_FREE 2
+#define ATOMIC_SHORT_LOCK_FREE 2
+#define ATOMIC_INT_LOCK_FREE 2
+#define ATOMIC_LONG_LOCK_FREE 2
+#define ATOMIC_LLONG_LOCK_FREE 2
+#define ATOMIC_POINTER_LOCK_FREE 2
+
+#define IS_LOCKLESS_ATOMIC(OBJECT)  \
+(sizeof(OBJECT) <= 8 && IS_POW2(sizeof(OBJECT)))
+
+#define ATOMIC_VAR_INIT(VALUE) (VALUE)
+#define atomic_init(OBJECT, VALUE) (*(OBJECT) = (VALUE), (void) 0)
+
+static inline void
+atomic_compiler_barrier(memory_order order)
+{
+/* In case of 'memory_order_consume', it is implicitly assumed that
+ * the compiler will not move instructions that have data-dependency
+ * on the variable in question before the barrier. */
+if (order > memory_order_consume) {
+_ReadWriteBarrier();
+}
+}
+
+static inline void
+atomic_thread_fence(memory_order order)
+{
+/* x86 is strongly ordered and acquire/release semantics come
+ * automatically. */
+atomic_compiler_barrier(order);
+if (order == memory_order_seq_cst) {
+  

Re: [ovs-dev] [patch net-next 01/13] openvswitch: split flow structures into ovs specific and generic ones

2014-09-03 Thread Jamal Hadi Salim

On 09/03/14 11:20, John Fastabend wrote:


Also I have some filters that can match on offset/length/mask
tuples. As far as I can tell this is going to have to be yet
another interface? Or would it be worth the effort to define
the flow key more generically. My initial guess is I'll just
write a separate interface. I think this is what Jamal referred
to as another "classifier".



Exactly. I have more complex classifiers as stated earlier.
I am afraid these patches again are not satisfying that need.

In any case - we are taking a different tact than these patches
do and hopefully at some point we can merge thoughts.

cheers,
jamal

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [patch net-next 01/13] openvswitch: split flow structures into ovs specific and generic ones

2014-09-03 Thread Jamal Hadi Salim

On 09/03/14 14:41, Pravin Shelar wrote:

On Wed, Sep 3, 2014 at 2:24 AM, Jiri Pirko  wrote:



HW offload API should be separate from OVS module.


The above part i agree with. Infact it is very odd that it seems
hard to get this point across ;->


This has following
advantages.
1. It can be managed by OVS userspace vswitchd process which has much
better context to setup hardware flow table. Once we add capabilities
for swdev, it is much more easier for vswitchd process to choose
correct (hw or sw) flow table for given flow.


This i disagree with.
The desire is to have existing user tools to work with offloads.
When necessary, we then create new tools.
Existing tools may need to be taught to do selectively do
hardware vs software offload. We have a precedence with
bridging code which selectively offloads to hardware using iproute2.


2. Other application that wants to use HW offload does not have
dependency on OVS kernel module.


Or on OF for that matter.


3. Hardware and software datapath remains separate, these two
components has no dependency on each other, both can be developed
independent of each other.



The basic definition of "offload" implies dependency;-> So,
I strongly disagree. You may need to go backwards and look at
views expressed on this (other than emails - theres slideware).

cheers,
jamal



___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [patch net-next 01/13] openvswitch: split flow structures into ovs specific and generic ones

2014-09-03 Thread Pravin Shelar
On Wed, Sep 3, 2014 at 2:22 PM, Jamal Hadi Salim  wrote:
> On 09/03/14 14:41, Pravin Shelar wrote:
>>
>> On Wed, Sep 3, 2014 at 2:24 AM, Jiri Pirko  wrote:
>
>
>> HW offload API should be separate from OVS module.
>
>
> The above part i agree with. Infact it is very odd that it seems
> hard to get this point across ;->
>
>
>> This has following
>> advantages.
>> 1. It can be managed by OVS userspace vswitchd process which has much
>> better context to setup hardware flow table. Once we add capabilities
>> for swdev, it is much more easier for vswitchd process to choose
>> correct (hw or sw) flow table for given flow.
>
>
> This i disagree with.
> The desire is to have existing user tools to work with offloads.
> When necessary, we then create new tools.
> Existing tools may need to be taught to do selectively do
> hardware vs software offload. We have a precedence with
> bridging code which selectively offloads to hardware using iproute2.
>
Both of us are saying same thing.
What I meant was for OVS use-case, where OVS wants to use offload for
switching flows, vswitchd userspace process can program HW offload
using kernel HW offload APIs directly from userspace, rather than
going through OVS kernel module. If user wants to use some other tool,
then the tool can use same kernel HW offload APIs.

>
>> 2. Other application that wants to use HW offload does not have
>> dependency on OVS kernel module.
>
>
> Or on OF for that matter.
>
>
>> 3. Hardware and software datapath remains separate, these two
>> components has no dependency on each other, both can be developed
>> independent of each other.
>>
>
> The basic definition of "offload" implies dependency;-> So,
> I strongly disagree. You may need to go backwards and look at
> views expressed on this (other than emails - theres slideware).
>

I was referring to code dependency in kernel. For example ovs flow-key
structure used. This complicates OVS internal structure which needs to
be shared plus OVS might need to extend interface for configuring HW
match or action that does not exist in OVS software datapath.

I agree these two components are related and that dependency can be
handled from userspace.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [Patch] Documentation for DPDK IVSHMEM VM Communications

2014-09-03 Thread Pravin Shelar
On Wed, Sep 3, 2014 at 9:31 AM, Polehn, Mike A  wrote:
> The setup for packet transfer between the switch and VM by shared memory 
> (IVSHMEM) is moderately complex and most details are not easily found.  Also 
> this is a different transfer method than user side vhost which copies between 
> the separate memory spaces at a cost of slower packet rate or higher CPU core 
> load(s). Shared memory transfer is much more efficient transfer method since 
> it is only copying packet pointers and not packet data. However lacks 
> security since the VM can see all the packet buffer memory space at all 
> times. However efficiency vs security is something only the user of the 
> system can determine since they know if the system is a closed environment or 
> not.
>
> I put in enough details to allow someone, who has not been intimately 
> involved in doing IVSHMEM packet processing work to setup and get the shared 
> memory transfer working in a short time given today's current build state. 
> Including the system packages to allow a proper build (qemu in particular) 
> maybe over kill, but figuring out these required packages can be very time 
> consuming. Unless it is working, you cannot experiment with or test the 
> IVSHMEM shared memory operation or even move the method forward as an 
> alternative setup, the correct information is just not readily available, 
> weeks can easily be spent (only if you are very determined to get it to work).
>
> The INSTALL.DPDK also needs to be update for DPDK 1.7 ...
>
I am not sure about updates, can you send patch?

> Would you like to have this put in as separate doc, INSTALL.DPDK.IVSHMEM?
>
ok, Keeping it in separate file sounds fine.

> Mike
>
> -Original Message-
> From: Pravin Shelar [mailto:pshe...@nicira.com]
> Sent: Friday, August 29, 2014 3:54 PM
> To: Polehn, Mike A
> Cc: d...@openvswitch.com
> Subject: Re: [ovs-dev] [Patch] Documentation for DPDK IVSHMEM VM 
> Communications
>
> On Fri, Aug 15, 2014 at 7:07 AM, Polehn, Mike A  
> wrote:
>> Adds documentation on how to run IVSHMEM communication through VM.
>>
> I think INSTALL.DPDK is getting rather large and hard to understand with all 
> details.
> so I dropped "Alternative method to get QEMU, download and build from OVDK" 
> section.
> We can add this documentation to separate file once vhost support is added.
>
> Thanks.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [Patch] Documentation for DPDK IVSHMEM VM Communications

2014-09-03 Thread Pravin Shelar
On Wed, Sep 3, 2014 at 10:14 AM, Flavio Leitner  wrote:
> On Fri, Aug 29, 2014 at 03:54:08PM -0700, Pravin Shelar wrote:
>> On Fri, Aug 15, 2014 at 7:07 AM, Polehn, Mike A  
>> wrote:
>> > Adds documentation on how to run IVSHMEM communication through VM.
>> >
>> I think INSTALL.DPDK is getting rather large and hard to understand
>> with all details.
>> so I dropped "Alternative method to get QEMU, download and build from
>> OVDK" section.
>> We can add this documentation to separate file once vhost support is added.
>
> It's better to have extra info than no info at all.
>
I agree, I just wanted to organize it better.

> Since all these features are recent, it's hard to find any info out
> there, so please keep it.
>
> fbl
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Introduce port_try_ref() to prevent a race.

2014-09-03 Thread Pravin Shelar
On Wed, Sep 3, 2014 at 1:24 PM, Alex Wang  wrote:
> After discussing with Pravin offline, it is good to use first get the
> reference
> and then access the 'port->netdev' for pmd check.
>
> So, use a nested if statement:
>
> @@ -1864,20 +1874,27 @@ pmd_load_queues(struct pmd_thread *f,
>  index = 0;
>
>  CMAP_FOR_EACH (port, node, &f->dp->ports) {
> -if (netdev_is_pmd(port->netdev)) {
> -int i;
> -
> -for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
> -if ((index % dp->n_pmd_threads) == id) {
> -poll_list = xrealloc(poll_list, sizeof *poll
> -
> -port_ref(port);
> -poll_list[poll_cnt].port = port;
> -poll_list[poll_cnt].rx = port->rxq[i];
> -poll_cnt++;
> +/* Calls port_try_ref() to prevent the main thread
> + * from deleting the port. */
> +if (port_try_ref(port)) {
> +if (netdev_is_pmd(port->netdev)) {
> +int i;
> +
> +for (i = 0; i < netdev_n_rxq(port->netdev); i++)
> +if ((index % dp->n_pmd_threads) == id) {
> +poll_list = xrealloc(poll_list,
> +sizeof *poll_list * (pol
> +
> +port_ref(port);
> +poll_list[poll_cnt].port = port;
> +poll_list[poll_cnt].rx = port->rxq[i];
> +poll_cnt++;
> +}
> +index++;
>  }
> -index++;
>  }
> +/* Unrefs the port_try_ref(). */
> +port_unref(port);
>  }
>
> Will apply to master soon,

Thanks.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [v5 2/2] datapath: Implement recirc action without recursion

2014-09-03 Thread Pravin Shelar
On Wed, Sep 3, 2014 at 1:05 PM, Andy Zhou  wrote:
> Since kernel stack is limited in size, it is not wise to using
> recursive function with large stack frames.
>
> This patch provides an alternative implementation of recirc action
> without using recursion.
>
> A per CPU fixed sized, 'deferred action FIFO', is used to store either
> recirc or sample actions encountered during execution of an action
> list. Not executing recirc or sample action in place, but rather execute
> them laster as 'deferred actions' avoids recursion.
>
> Deferred actions are only executed after all other actions has been
> executed, including the ones triggered by loopback from the kernel
> network stack.
>
> The size of the private FIFO, currently set to 20, limits the number
> of total 'deferred actions' any one packet can accumulate.
>
> Signed-off-by: Andy Zhou 
>
> ---
> v4->v5:
> Reset fifo after processing deferred actions
> move private data structures from actions.h to actions.c
> remove action_fifo init functions, since default percpu data
>will be zero.

This looks pretty close.

> ---
>  datapath/Modules.mk |   1 +
>  datapath/actions.c  | 175 
> 
>  datapath/actions.h  |  31 ++
>  datapath/datapath.c |   1 +
>  datapath/datapath.h |   4 +-
>  5 files changed, 197 insertions(+), 15 deletions(-)
>  create mode 100644 datapath/actions.h
>
> diff --git a/datapath/Modules.mk b/datapath/Modules.mk
> index 90e158c..2e74f6e 100644
> --- a/datapath/Modules.mk
> +++ b/datapath/Modules.mk
> @@ -23,6 +23,7 @@ openvswitch_sources = \
>
>  openvswitch_headers = \
> compat.h \
> +   actions.h \
> datapath.h \
> flow.h \
> flow_netlink.h \
> diff --git a/datapath/actions.c b/datapath/actions.c
> index 0a22e55..6ad5bbe 100644
> --- a/datapath/actions.c
> +++ b/datapath/actions.c
> @@ -39,6 +39,74 @@
>  #include "mpls.h"
>  #include "vlan.h"
>  #include "vport.h"
> +#include "actions.h"
> +
> +struct ovs_deferred_action {
> +   struct sk_buff *skb;
> +   const struct nlattr *actions;
> +
> +   /* Store pkt_key clone when creating deferred action. */
> +   struct sw_flow_key pkt_key;
> +};
> +
> +#define OVS_DEFERRED_ACTION_FIFO_SIZE 20
> +struct ovs_action_fifo {
> +   int head;
> +   int tail;
> +   /* Deferred action fifo queue storage. */
> +   struct ovs_deferred_action fifo[OVS_DEFERRED_ACTION_FIFO_SIZE];
> +};
> +
> +static DEFINE_PER_CPU(struct ovs_action_fifo, action_fifos);
> +#define OVS_EXEC_ACTIONS_COUNT_LIMIT 4   /* limit used to detect packet
> +   looping by the network stack */
> +static DEFINE_PER_CPU(int, ovs_exec_actions_count);
> +
need better name.

> +static inline void action_fifo_init(struct ovs_action_fifo *fifo)
> +{
> +   fifo->head = 0;
> +   fifo->tail = 0;
> +}
> +
> +static inline bool action_fifo_is_empty(struct ovs_action_fifo *fifo)
> +{
> +   return (fifo->head == fifo->tail);
> +}
> +
> +static inline struct ovs_deferred_action *
> +action_fifo_get(struct ovs_action_fifo *fifo)
> +{
> +   if (action_fifo_is_empty(fifo))
> +   return NULL;
> +
> +   return &fifo->fifo[fifo->tail++];
> +}
> +
> +static inline struct ovs_deferred_action *
> +action_fifo_put(struct ovs_action_fifo *fifo)
> +{
> +   if (fifo->head >= OVS_DEFERRED_ACTION_FIFO_SIZE - 1)
> +   return NULL;
> +
> +   return &fifo->fifo[fifo->head++];
> +}
> +
> +static inline struct ovs_deferred_action *
> +add_deferred_actions(struct sk_buff *skb, const struct nlattr *attr)
> +{
> +   struct ovs_action_fifo *fifo;
> +   struct ovs_deferred_action *da;
> +
> +   fifo = this_cpu_ptr(&(action_fifos));
> +   da = action_fifo_put(fifo);
> +
> +   if (da) {
> +   da->skb = skb;
> +   da->actions = attr;
> +   }
> +
> +   return da;
> +}
>
There is no need to inline any symbols in .c file, it hides compiler
warnings of unused symbols.

>  static void flow_key_clone(struct sk_buff *skb, struct sw_flow_key *new_key)
>  {
> @@ -689,9 +757,9 @@ static bool last_action(const struct nlattr *a, int rem)
>  static int sample(struct datapath *dp, struct sk_buff *skb,
>   const struct nlattr *attr)
>  {
> -   struct sw_flow_key sample_key;
> const struct nlattr *acts_list = NULL;
> const struct nlattr *a;
> +   struct ovs_deferred_action *da;
> int rem;
>
> for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
> @@ -728,10 +796,19 @@ static int sample(struct datapath *dp, struct sk_buff 
> *skb,
> /* Skip the sample action when out of memory. */
> return 0;
>
> -   flow_key_clone(skb, &sample_key);
> +   da = add_deferred_actions(skb, a);
> +   if (!da) {
> +   if (net_ratelimit())
> +   pr_warn("%s: deferred actions limit re

Re: [ovs-dev] [patch net-next 07/13] dsa: implement ndo_swdev_get_id

2014-09-03 Thread Florian Fainelli
On 09/03/2014 02:24 AM, Jiri Pirko wrote:
> Signed-off-by: Jiri Pirko 
> ---
>  include/linux/netdevice.h |  3 ++-
>  include/net/dsa.h |  1 +
>  net/dsa/Kconfig   |  2 +-
>  net/dsa/dsa.c |  3 +++
>  net/dsa/slave.c   | 10 ++
>  5 files changed, 17 insertions(+), 2 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 6a009d1..7ee070f 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -41,7 +41,6 @@
>  
>  #include 
>  #include 
> -#include 
>  #ifdef CONFIG_DCB
>  #include 
>  #endif
> @@ -1259,6 +1258,8 @@ enum netdev_priv_flags {
>  #define IFF_LIVE_ADDR_CHANGE IFF_LIVE_ADDR_CHANGE
>  #define IFF_MACVLAN  IFF_MACVLAN
>  
> +#include 
> +
>  /**
>   *   struct net_device - The DEVICE structure.
>   *   Actually, this whole structure is a big mistake.  It mixes I/O
> diff --git a/include/net/dsa.h b/include/net/dsa.h
> index 9771292..d60cd42 100644
> --- a/include/net/dsa.h
> +++ b/include/net/dsa.h
> @@ -140,6 +140,7 @@ struct dsa_switch {
>   u32 phys_mii_mask;
>   struct mii_bus  *slave_mii_bus;
>   struct net_device   *ports[DSA_MAX_PORTS];
> + struct netdev_phys_item_id psid;
>  };
>  
>  static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
> diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
> index a585fd6..4e144a2 100644
> --- a/net/dsa/Kconfig
> +++ b/net/dsa/Kconfig
> @@ -1,6 +1,6 @@
>  config HAVE_NET_DSA
>   def_bool y
> - depends on NETDEVICES && !S390
> + depends on NETDEVICES && NET_SWITCHDEV && !S390

It does not look like this is necessary, we are only using definitions
from net/dsa.h and include/linux/netdevice.h, and if it was, a 'select'
would be more appropriate here I think.

TBH, I think we should rather drop this patch for now, I do not see any
benefit in providing a random id over no-id at all.

>  
>  # Drivers must select NET_DSA and the appropriate tagging format
>  
> diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
> index 61f145c..374912d 100644
> --- a/net/dsa/dsa.c
> +++ b/net/dsa/dsa.c
> @@ -202,6 +202,9 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
>   ds->ports[i] = slave_dev;
>   }
>  
> + ds->psid.id_len = MAX_PHYS_ITEM_ID_LEN;
> + get_random_bytes(ds->psid.id, ds->psid.id_len);
> +
>   return ds;
>  
>  out_free:
> diff --git a/net/dsa/slave.c b/net/dsa/slave.c
> index 7333a4a..d79a6c7 100644
> --- a/net/dsa/slave.c
> +++ b/net/dsa/slave.c
> @@ -192,6 +192,15 @@ static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff 
> *skb,
>   return NETDEV_TX_OK;
>  }
>  
> +static int dsa_slave_swdev_get_id(struct net_device *dev,
> +   struct netdev_phys_item_id *psid)
> +{
> + struct dsa_slave_priv *p = netdev_priv(dev);
> + struct dsa_switch *ds = p->parent;
> +
> + memcpy(psid, &ds->psid, sizeof(*psid));
> + return 0;
> +}
>  
>  /* ethtool operations 
> ***/
>  static int
> @@ -323,6 +332,7 @@ static const struct net_device_ops dsa_slave_netdev_ops = 
> {
>   .ndo_set_rx_mode= dsa_slave_set_rx_mode,
>   .ndo_set_mac_address= dsa_slave_set_mac_address,
>   .ndo_do_ioctl   = dsa_slave_ioctl,
> + .ndo_swdev_get_id   = dsa_slave_swdev_get_id,
>  };
>  
>  static const struct dsa_device_ops notag_netdev_ops = {
> 

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 0/6] datapath-windows: Netlink put apis.

2014-09-03 Thread Ankur Sharma
This series contains changes for providing Netlink PUT Apis in
windows kernel.

Ankur Sharma (6):
  datapath-windows/Netlink: Move netlink files to a new directory.
  NetlinkBuf.c: Netlink buffer mgmt apis.
  NetlinkProto.h: Minor fix for typos and new macro for padding.
  OvsTypes.h : Added support for BE16
  Netlink.c: Added netlink put APIs.
  NetlinkBuf.c: Minor fix for lines exceeding 79 chars

 datapath-windows/automake.mk   |   8 +-
 datapath-windows/include/OvsPub.h  |   2 +-
 datapath-windows/ovsext/Netlink.c  | 469 ---
 datapath-windows/ovsext/Netlink.h  | 104 ---
 datapath-windows/ovsext/Netlink/Netlink.c  | 864 +
 datapath-windows/ovsext/Netlink/Netlink.h  | 132 
 datapath-windows/ovsext/Netlink/NetlinkBuf.c   | 352 +
 datapath-windows/ovsext/Netlink/NetlinkBuf.h   |  41 +
 .../ovsext/{ => Netlink}/NetlinkProto.h|   7 +-
 datapath-windows/ovsext/Types.h|   1 +
 datapath-windows/ovsext/ovsext.vcxproj |  10 +-
 datapath-windows/ovsext/precomp.h  |   4 +-
 12 files changed, 1409 insertions(+), 585 deletions(-)
 delete mode 100644 datapath-windows/ovsext/Netlink.c
 delete mode 100644 datapath-windows/ovsext/Netlink.h
 create mode 100644 datapath-windows/ovsext/Netlink/Netlink.c
 create mode 100644 datapath-windows/ovsext/Netlink/Netlink.h
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.c
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.h
 rename datapath-windows/ovsext/{ => Netlink}/NetlinkProto.h (94%)

-- 
1.9.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 1/6] datapath-windows/Netlink: Move netlink files to a new directory.

2014-09-03 Thread Ankur Sharma
In this change we have created a new directory named Netlink
inside datapath-windows/ovsext/. This directory will be used to
keep all the netlink related files.

The reason we have created new directory is that for 'put' related
APIs we will be adding netlink buffer mgmt files as well. These files
will take the count of netlink related files to 5. Hence
we decided to club the netlink files in a single directory.

Signed-off-by: Ankur Sharma 
Tested-by: Ankur Sharma 
Reported-at: https://github.com/openvswitch/ovs-issues/issues/37
Acked-by: Eitan Eliahu 
Acked-by: Alin Gabriel Serdean 

---
 datapath-windows/automake.mk | 6 +++---
 datapath-windows/include/OvsPub.h| 2 +-
 datapath-windows/ovsext/{ => Netlink}/Netlink.c  | 0
 datapath-windows/ovsext/{ => Netlink}/Netlink.h  | 0
 datapath-windows/ovsext/{ => Netlink}/NetlinkProto.h | 2 +-
 datapath-windows/ovsext/ovsext.vcxproj   | 8 
 datapath-windows/ovsext/precomp.h| 4 ++--
 7 files changed, 11 insertions(+), 11 deletions(-)
 rename datapath-windows/ovsext/{ => Netlink}/Netlink.c (100%)
 rename datapath-windows/ovsext/{ => Netlink}/Netlink.h (100%)
 rename datapath-windows/ovsext/{ => Netlink}/NetlinkProto.h (99%)

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index eb59274..5ea0197 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -5,9 +5,6 @@ EXTRA_DIST += \
datapath-windows/Package/package.VcxProj.user \
datapath-windows/include/OvsDpInterfaceExt.h \
datapath-windows/include/OvsPub.h\
-   datapath-windows/ovsext/Netlink.c \
-   datapath-windows/ovsext/Netlink.h \
-   datapath-windows/ovsext/NetlinkProto.h \
datapath-windows/misc/install.cmd \
datapath-windows/misc/uninstall.cmd \
datapath-windows/ovsext.sln \
@@ -33,6 +30,9 @@ EXTRA_DIST += \
datapath-windows/ovsext/IpHelper.h \
datapath-windows/ovsext/Jhash.c \
datapath-windows/ovsext/Jhash.h \
+   datapath-windows/ovsext/Netlink/Netlink.c \
+   datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
datapath-windows/ovsext/Oid.h \
diff --git a/datapath-windows/include/OvsPub.h 
b/datapath-windows/include/OvsPub.h
index 0446309..36814c4 100644
--- a/datapath-windows/include/OvsPub.h
+++ b/datapath-windows/include/OvsPub.h
@@ -17,7 +17,7 @@
 #ifndef __OVS_PUB_H_
 #define __OVS_PUB_H_ 1
 
-#include "../ovsext/Netlink.h"
+#include "../ovsext/Netlink/Netlink.h"
 
 #define OVS_DRIVER_MAJOR_VER 1
 #define OVS_DRIVER_MINOR_VER 0
diff --git a/datapath-windows/ovsext/Netlink.c 
b/datapath-windows/ovsext/Netlink/Netlink.c
similarity index 100%
rename from datapath-windows/ovsext/Netlink.c
rename to datapath-windows/ovsext/Netlink/Netlink.c
diff --git a/datapath-windows/ovsext/Netlink.h 
b/datapath-windows/ovsext/Netlink/Netlink.h
similarity index 100%
rename from datapath-windows/ovsext/Netlink.h
rename to datapath-windows/ovsext/Netlink/Netlink.h
diff --git a/datapath-windows/ovsext/NetlinkProto.h 
b/datapath-windows/ovsext/Netlink/NetlinkProto.h
similarity index 99%
rename from datapath-windows/ovsext/NetlinkProto.h
rename to datapath-windows/ovsext/Netlink/NetlinkProto.h
index 399b286..898cc84 100644
--- a/datapath-windows/ovsext/NetlinkProto.h
+++ b/datapath-windows/ovsext/Netlink/NetlinkProto.h
@@ -113,4 +113,4 @@ BUILD_ASSERT_DECL(sizeof(NL_ATTR) == 4);
 #define OVS_HDRLEN NLMSG_ALIGN(sizeof(OVS_HDR))
 #define NLA_HDRLEN ((INT) NLA_ALIGN(sizeof(NL_ATTR)))
 
-#endif /* __NETLINK_PROTO_H_ */
+#endif /* NetlinProto.h */
diff --git a/datapath-windows/ovsext/ovsext.vcxproj 
b/datapath-windows/ovsext/ovsext.vcxproj
index 82ab908..a5464d4 100644
--- a/datapath-windows/ovsext/ovsext.vcxproj
+++ b/datapath-windows/ovsext/ovsext.vcxproj
@@ -82,8 +82,8 @@
 
 
 
-
-
+
+
 
 
 
@@ -138,7 +138,7 @@
 
 
 
-
+
 
 
 
@@ -168,4 +168,4 @@
 
   
   
-
\ No newline at end of file
+
diff --git a/datapath-windows/ovsext/precomp.h 
b/datapath-windows/ovsext/precomp.h
index 41e3218..765075a 100644
--- a/datapath-windows/ovsext/precomp.h
+++ b/datapath-windows/ovsext/precomp.h
@@ -24,8 +24,8 @@
 #include "Types.h"
 #include "..\include\OvsPub.h"
 #include "Util.h"
-#include "Netlink.h"
-#include "NetlinkProto.h"
+#include "Netlink/Netlink.h"
+#include "Netlink/NetlinkProto.h"
 /*
  * Include openvswitch.h from userspace. Changing the location the file from
  * include/linux is pending discussion.
-- 
1.9.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 4/6] OvsTypes.h : Added support for BE16

2014-09-03 Thread Ankur Sharma
Signed-off-by: Ankur Sharma 
Tested-by: Ankur Sharma 
Reported-at: https://github.com/openvswitch/ovs-issues/issues/37
Acked-by: Eitan Eliahu 
Acked-by: Alin Gabriel Serdean 

---
 datapath-windows/ovsext/Types.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/datapath-windows/ovsext/Types.h b/datapath-windows/ovsext/Types.h
index e48df7a..b2ef48c 100644
--- a/datapath-windows/ovsext/Types.h
+++ b/datapath-windows/ovsext/Types.h
@@ -31,6 +31,7 @@ typedef uint8 __u8;
 
 /* Defines the  userspace specific data types for file
  * included within kernel only. */
+typedef UINT16 BE16;
 typedef UINT32 BE32;
 typedef UINT64 BE64;
 
-- 
1.9.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 3/6] NetlinkProto.h: Minor fix for typos and new macro for padding.

2014-09-03 Thread Ankur Sharma
Added a new macro for calculating the number of bytes required
for padding. Fixed a minor typo.

Signed-off-by: Ankur Sharma 
Tested-by: Ankur Sharma 
Reported-at: https://github.com/openvswitch/ovs-issues/issues/37
Acked-by: Eitan Eliahu 
Acked-by: Alin Gabriel Serdean 

---
 datapath-windows/ovsext/Netlink/NetlinkProto.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/datapath-windows/ovsext/Netlink/NetlinkProto.h 
b/datapath-windows/ovsext/Netlink/NetlinkProto.h
index 898cc84..2c438a6 100644
--- a/datapath-windows/ovsext/Netlink/NetlinkProto.h
+++ b/datapath-windows/ovsext/Netlink/NetlinkProto.h
@@ -39,6 +39,9 @@
 /* Returns X rounded up to the nearest multiple of Y. */
 #define ROUND_UP(X, Y) (DIV_ROUND_UP(X, Y) * (Y))
 
+/* Returns the least number that, when added to X, yields a multiple of Y. */
+#define PAD_SIZE(X, Y) (ROUND_UP(X, Y) - (X))
+
 /* Netlink message */
 
 /* nlmsg_flags bits. */
@@ -92,7 +95,7 @@ typedef struct _GENL_MSG_HDR {
 UINT8 cmd;
 UINT8 version;
 UINT16 reserved;
-} GENL_MSG_HDR, *PGENL_MDG_HDR;
+} GENL_MSG_HDR, *PGENL_MSG_HDR;
 BUILD_ASSERT_DECL(sizeof(GENL_MSG_HDR) == 4);
 
 /* Netlink attributes */
-- 
1.9.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 6/6] NetlinkBuf.c: Minor fix for lines exceeding 79 chars

2014-09-03 Thread Ankur Sharma
Signed-off-by: Ankur Sharma 
Tested-by: Ankur Sharma 
Reported-at: https://github.com/openvswitch/ovs-issues/issues/37
Acked-by: Eitan Eliahu 
Acked-by: Alin Gabriel Serdean 

---
 datapath-windows/ovsext/Netlink/NetlinkBuf.c | 9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/datapath-windows/ovsext/Netlink/NetlinkBuf.c 
b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
index 590c803..ba10d83 100644
--- a/datapath-windows/ovsext/Netlink/NetlinkBuf.c
+++ b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
@@ -71,7 +71,8 @@
 static __inline VOID
 _NlBufCopyAtTailUnsafe(PNL_BUFFER nlBuf, PCHAR data, UINT32 len);
 static __inline VOID
-_NlBufCopyAtOffsetUnsafe(PNL_BUFFER nlBuf, PCHAR data, UINT32 len, UINT32 
offset);
+_NlBufCopyAtOffsetUnsafe(PNL_BUFFER nlBuf, PCHAR data,
+ UINT32 len, UINT32 offset);
 
 /*
  * --
@@ -134,7 +135,8 @@ done:
  * NlBufCopyAtHead --
  *
  *Copies the data to the head of the buffer.
- *It can be seen as special case of NlBufCopyAtOffset with input offset 
zero.
+ *It can be seen as special case of NlBufCopyAtOffset with input
+ *offset zero.
  * --
  */
 BOOLEAN
@@ -330,7 +332,8 @@ _NlBufCopyAtTailUnsafe(PNL_BUFFER nlBuf, PCHAR data, UINT32 
len)
  * --
  */
 static __inline VOID
-_NlBufCopyAtOffsetUnsafe(PNL_BUFFER nlBuf, PCHAR data, UINT32 len, UINT32 
offset)
+_NlBufCopyAtOffsetUnsafe(PNL_BUFFER nlBuf, PCHAR data,
+ UINT32 len, UINT32 offset)
 {
 PCHAR dest = NULL;
 
-- 
1.9.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 2/6] NetlinkBuf.c: Netlink buffer mgmt apis.

2014-09-03 Thread Ankur Sharma
In this change we have introduced buffer mgmt apis which will be
used while creating netlink messages. The basic functionality provided
by apis is on similar lines to ofpbuf in userspace with an exception
that it will not do run time buffer reallocation.

Signed-off-by: Ankur Sharma 
Tested-by: Ankur Sharma 
Reported-at: https://github.com/openvswitch/ovs-issues/issues/37
Acked-by: Eitan Eliahu 
Acked-by: Alin Gabriel Serdean 

---
 datapath-windows/automake.mk |   2 +
 datapath-windows/ovsext/Netlink/NetlinkBuf.c | 349 +++
 datapath-windows/ovsext/Netlink/NetlinkBuf.h |  41 
 datapath-windows/ovsext/ovsext.vcxproj   |   2 +
 4 files changed, 394 insertions(+)
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.c
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index 5ea0197..297a809 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -32,6 +32,8 @@ EXTRA_DIST += \
datapath-windows/ovsext/Jhash.h \
datapath-windows/ovsext/Netlink/Netlink.c \
datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkBuf.c \
+   datapath-windows/ovsext/Netlink/NetlinkBuf.h \
datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
diff --git a/datapath-windows/ovsext/Netlink/NetlinkBuf.c 
b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
new file mode 100644
index 000..590c803
--- /dev/null
+++ b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ==
+ * This is a simple buffer mangement framework specific for netlink protocol.
+ * The name could be confused with ovsext/BufferMgmt.c. Ovsext/BufferMgmt.c
+ * deals with buffer mgmt for NBLs. Where as this framework deals with
+ * management of buffer that holds a netlink message.
+ *
+ * This framework provides APIs for putting/accessing data in a buffer. These
+ * APIs are used by driver's netlink protocol implementation.
+ *
+ * We can see this framework as a subset of ofpbuf in ovs userspace.
+ *
+ * This framework is NOT a generic buffer management framework (ofpbuf
+ * is a generic buffer mgmt framework) and provides only the functioanlities
+ * which would be useful for netlink protocol. Some of the key features are:
+ *
+ * a. It DOES NOT support automatic buffer reallocation.
+ *i. A netlink input/output message is a static buffer.
+ * b. The unused space is at the tail.
+ * c. There is no notion of headdroom.
+ * ==
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_NETLINK
+#include "Debug.h"
+#include "NetlinkBuf.h"
+
+/* Returns used space in the buffer */
+#define NL_BUF_USED_SPACE(_buf)   (_buf->bufLen -\
+   _buf->bufRemLen)
+
+/* Validates that offset is within buffer boundaries and will not
+ * create holes in the buffer.*/
+#define NL_BUF_IS_VALID_OFFSET(_buf, _offset) (_offset <=\
+   NL_BUF_TAIL_OFFSET(_buf) ? 
1 : 0)
+
+/* Validates if new data of size _size can be added at offset _offset.
+ * This macor assumes that offset validation has been done.*/
+#define NL_BUF_CAN_ADD(_buf, _size, _offset)  (((_offset + _size <=  \
+  _buf->bufLen) && (_size\
+  <= _buf->bufRemLen)) ? \
+  1 : 0)
+
+/* Returns the offset of tail wrt buffer head */
+#define NL_BUF_TAIL_OFFSET(_buf)  (_buf->tail - _buf->head)
+
+static __inline VOID
+_NlBufCopyAtTailUnsafe(PNL_BUFFER nlBuf, PCHAR data, UINT32 len);
+static __inline VOID
+_NlBufCopyAtOffsetUnsafe(PNL_BUFFER nlBuf, PCHAR data, UINT32 len, UINT32 
offset);
+
+/*
+ * --
+ * NlBufInit --
+ *
+ *Initializes NL_BUF with buffer pointer and length.
+ * -

[ovs-dev] [PATCH v2 5/6] Netlink.c: Added netlink put APIs.

2014-09-03 Thread Ankur Sharma
In this change we have added the APIs for putting
netlink headers, attributes in a buffer.

The buffer is managed through NetlinkBuf.[c|h].

Signed-off-by: Ankur Sharma 
Tested-by: Ankur Sharma 
Reported-at: https://github.com/openvswitch/ovs-issues/issues/37
Acked-by: Eitan Eliahu 
Acked-by: Alin Gabriel Serdean 

---
 datapath-windows/ovsext/Netlink/Netlink.c | 403 +-
 datapath-windows/ovsext/Netlink/Netlink.h |  28 +++
 2 files changed, 427 insertions(+), 4 deletions(-)

diff --git a/datapath-windows/ovsext/Netlink/Netlink.c 
b/datapath-windows/ovsext/Netlink/Netlink.c
index 90a633b..99cdc0e 100644
--- a/datapath-windows/ovsext/Netlink/Netlink.c
+++ b/datapath-windows/ovsext/Netlink/Netlink.c
@@ -24,6 +24,402 @@
 #define OVS_DBG_MOD OVS_DBG_NETLINK
 #include "Debug.h"
 
+/* ==
+ * This file provides simple netlink get, put and validation APIs.
+ * Most of the code is on similar lines as userspace netlink implementation.
+ *
+ * TODO: Convert these methods to inline.
+ * ==
+ */
+
+/*
+ * ---
+ * Adds Netlink Header to the NL_BUF.
+ * ---
+ */
+BOOLEAN
+NlMsgPutNlHdr(PNL_BUFFER buf, PNL_MSG_HDR nlMsg)
+{
+if ((NlBufCopyAtOffset(buf, (PCHAR)nlMsg, NLMSG_HDRLEN, 0))) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * ---
+ * Adds Genl Header to the NL_BUF.
+ * ---
+ */
+BOOLEAN
+NlMsgPutGenlHdr(PNL_BUFFER buf, PGENL_MSG_HDR genlMsg)
+{
+if ((NlBufCopyAtOffset(buf, (PCHAR)genlMsg, GENL_HDRLEN, NLMSG_HDRLEN))) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * ---
+ * Adds OVS Header to the NL_BUF.
+ * ---
+ */
+BOOLEAN
+NlMsgPutOvsHdr(PNL_BUFFER buf, POVS_HDR ovsHdr)
+{
+if ((NlBufCopyAtOffset(buf, (PCHAR)ovsHdr, OVS_HDRLEN,
+   GENL_HDRLEN + NLMSG_HDRLEN))) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * ---
+ * Adds data of length 'len' to the tail end of NL_BUF.
+ * Refer nl_msg_put for more details.
+ * ---
+ */
+BOOLEAN
+NlMsgPutTail(PNL_BUFFER buf, const PCHAR data, UINT32 len)
+{
+len = NLMSG_ALIGN(len);
+if (NlBufCopyAtTail(buf, data, len)) {
+return TRUE;
+}
+
+return FALSE;
+}
+
+/*
+ * ---
+ * memsets length 'len' at tail end of NL_BUF.
+ * Refer nl_msg_put_uninit for more details.
+ * ---
+ */
+PCHAR
+NlMsgPutTailUninit(PNL_BUFFER buf, UINT32 len)
+{
+len = NLMSG_ALIGN(len);
+return NlBufCopyAtTailUninit(buf, len);
+}
+
+/*
+ * ---
+ * Adds an attribute to the tail end of buffer. It does
+ * not copy the attribute payload.
+ * Refer nl_msg_put_unspec_uninit for more details.
+ * ---
+ */
+PCHAR
+NlMsgPutTailUnspecUninit(PNL_BUFFER buf, UINT16 type, UINT16 len)
+{
+PCHAR ret = NULL;
+UINT16 totalLen = NLA_HDRLEN + len;
+PNL_ATTR nla = (PNL_ATTR)(NlMsgPutTailUninit(buf, totalLen));
+
+if (!nla) {
+goto done;
+}
+
+ret = (PCHAR)(nla + 1);
+nla->nlaLen = totalLen;
+nla->nlaType = type;
+
+done:
+return ret;
+}
+
+/*
+ * ---
+ * Adds an attribute to the tail end of buffer. It copies attribute
+ * payload as well.
+ * Refer nl_msg_put_unspec for more details.
+ * ---
+ */
+BOOLEAN
+NlMsgPutTailUnspec(PNL_BUFFER buf, UINT16 type, PCHAR data, UINT16 len)
+{
+BOOLEAN ret = TRUE;
+PCHAR nlaData = NlMsgPutTailUnspecUninit(buf, type, len);
+
+if (!nlaData) {
+ret = FALSE;
+goto done;
+}
+
+RtlCopyMemory(nlaData, data, len);
+
+done:
+return ret;
+}
+
+/*
+ * ---
+ * Adds an attribute of 'type' and no payload at the tail end of buffer.
+ * Refer nl_msg_put_flag for more details.
+ * ---
+ */
+BOOLEAN
+NlMsgPutTailFlag(PNL_BUFFER buf, UINT16 type)
+{
+BOOLEAN ret = TRUE;
+PCHAR nlaData = NlMsgPutTailUnspecUnin

[ovs-dev] [PATCH v2 2/6] NetlinkBuf.c: Netlink buffer mgmt apis.

2014-09-03 Thread Ankur Sharma
In this change we have introduced buffer mgmt apis which will be
used while creating netlink messages. The basic functionality provided
by apis is on similar lines to ofpbuf in userspace with an exception
that it will not do run time buffer reallocation.

Signed-off-by: Ankur Sharma 
Tested-by: Ankur Sharma 
Reported-at: https://github.com/openvswitch/ovs-issues/issues/37
Acked-by: Eitan Eliahu 
Acked-by: Alin Gabriel Serdean 

---
 datapath-windows/automake.mk |   2 +
 datapath-windows/ovsext/Netlink/NetlinkBuf.c | 349 +++
 datapath-windows/ovsext/Netlink/NetlinkBuf.h |  41 
 datapath-windows/ovsext/ovsext.vcxproj   |   2 +
 4 files changed, 394 insertions(+)
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.c
 create mode 100644 datapath-windows/ovsext/Netlink/NetlinkBuf.h

diff --git a/datapath-windows/automake.mk b/datapath-windows/automake.mk
index 5ea0197..297a809 100644
--- a/datapath-windows/automake.mk
+++ b/datapath-windows/automake.mk
@@ -32,6 +32,8 @@ EXTRA_DIST += \
datapath-windows/ovsext/Jhash.h \
datapath-windows/ovsext/Netlink/Netlink.c \
datapath-windows/ovsext/Netlink/Netlink.h \
+   datapath-windows/ovsext/Netlink/NetlinkBuf.c \
+   datapath-windows/ovsext/Netlink/NetlinkBuf.h \
datapath-windows/ovsext/Netlink/NetlinkProto.h \
datapath-windows/ovsext/NetProto.h \
datapath-windows/ovsext/Oid.c \
diff --git a/datapath-windows/ovsext/Netlink/NetlinkBuf.c 
b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
new file mode 100644
index 000..590c803
--- /dev/null
+++ b/datapath-windows/ovsext/Netlink/NetlinkBuf.c
@@ -0,0 +1,349 @@
+/*
+ * Copyright (c) 2014 VMware, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ==
+ * This is a simple buffer mangement framework specific for netlink protocol.
+ * The name could be confused with ovsext/BufferMgmt.c. Ovsext/BufferMgmt.c
+ * deals with buffer mgmt for NBLs. Where as this framework deals with
+ * management of buffer that holds a netlink message.
+ *
+ * This framework provides APIs for putting/accessing data in a buffer. These
+ * APIs are used by driver's netlink protocol implementation.
+ *
+ * We can see this framework as a subset of ofpbuf in ovs userspace.
+ *
+ * This framework is NOT a generic buffer management framework (ofpbuf
+ * is a generic buffer mgmt framework) and provides only the functioanlities
+ * which would be useful for netlink protocol. Some of the key features are:
+ *
+ * a. It DOES NOT support automatic buffer reallocation.
+ *i. A netlink input/output message is a static buffer.
+ * b. The unused space is at the tail.
+ * c. There is no notion of headdroom.
+ * ==
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#ifdef OVS_DBG_MOD
+#undef OVS_DBG_MOD
+#endif
+#define OVS_DBG_MOD OVS_DBG_NETLINK
+#include "Debug.h"
+#include "NetlinkBuf.h"
+
+/* Returns used space in the buffer */
+#define NL_BUF_USED_SPACE(_buf)   (_buf->bufLen -\
+   _buf->bufRemLen)
+
+/* Validates that offset is within buffer boundaries and will not
+ * create holes in the buffer.*/
+#define NL_BUF_IS_VALID_OFFSET(_buf, _offset) (_offset <=\
+   NL_BUF_TAIL_OFFSET(_buf) ? 
1 : 0)
+
+/* Validates if new data of size _size can be added at offset _offset.
+ * This macor assumes that offset validation has been done.*/
+#define NL_BUF_CAN_ADD(_buf, _size, _offset)  (((_offset + _size <=  \
+  _buf->bufLen) && (_size\
+  <= _buf->bufRemLen)) ? \
+  1 : 0)
+
+/* Returns the offset of tail wrt buffer head */
+#define NL_BUF_TAIL_OFFSET(_buf)  (_buf->tail - _buf->head)
+
+static __inline VOID
+_NlBufCopyAtTailUnsafe(PNL_BUFFER nlBuf, PCHAR data, UINT32 len);
+static __inline VOID
+_NlBufCopyAtOffsetUnsafe(PNL_BUFFER nlBuf, PCHAR data, UINT32 len, UINT32 
offset);
+
+/*
+ * --
+ * NlBufInit --
+ *
+ *Initializes NL_BUF with buffer pointer and length.
+ * -

[ovs-dev] [v5 2/2] datapath: Implement recirc action without recursion

2014-09-03 Thread Andy Zhou
Since kernel stack is limited in size, it is not wise to using
recursive function with large stack frames.

This patch provides an alternative implementation of recirc action
without using recursion.

A per CPU fixed sized, 'deferred action FIFO', is used to store either
recirc or sample actions encountered during execution of an action
list. Not executing recirc or sample action in place, but rather execute
them laster as 'deferred actions' avoids recursion.

Deferred actions are only executed after all other actions has been
executed, including the ones triggered by loopback from the kernel
network stack.

The size of the private FIFO, currently set to 20, limits the number
of total 'deferred actions' any one packet can accumulate.

Signed-off-by: Andy Zhou 

---
v5->v6:
Remove ovs_ prefix for internal symbols.
Remove actions.h
Rename ovs_exec_actions_count to exec_actions_limit
Rename ovs_process_deferred_packets() to
process_deferred_actions()

v4->v5:
Reset fifo after processing deferred actions
move private data structures from actions.h to actions.c
remove action_fifo init functions, since default percpu data
   will be zero.
---
 datapath/actions.c | 173 +
 1 file changed, 161 insertions(+), 12 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 0a22e55..38aab64 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -40,6 +40,73 @@
 #include "vlan.h"
 #include "vport.h"
 
+struct deferred_action {
+   struct sk_buff *skb;
+   const struct nlattr *actions;
+
+   /* Store pkt_key clone when creating deferred action. */
+   struct sw_flow_key pkt_key;
+};
+
+#define DEFERRED_ACTION_FIFO_SIZE 20
+struct action_fifo {
+   int head;
+   int tail;
+   /* Deferred action fifo queue storage. */
+   struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE];
+};
+
+static DEFINE_PER_CPU(struct action_fifo, action_fifos);
+#define EXEC_ACTIONS_LEVEL_LIMIT 4   /* limit used to detect packet
+   looping by the network stack */
+static DEFINE_PER_CPU(int, exec_actions_level);
+
+static void action_fifo_init(struct action_fifo *fifo)
+{
+   fifo->head = 0;
+   fifo->tail = 0;
+}
+
+static bool action_fifo_is_empty(struct action_fifo *fifo)
+{
+   return (fifo->head == fifo->tail);
+}
+
+static struct deferred_action *
+action_fifo_get(struct action_fifo *fifo)
+{
+   if (action_fifo_is_empty(fifo))
+   return NULL;
+
+   return &fifo->fifo[fifo->tail++];
+}
+
+static struct deferred_action *
+action_fifo_put(struct action_fifo *fifo)
+{
+   if (fifo->head >= DEFERRED_ACTION_FIFO_SIZE - 1)
+   return NULL;
+
+   return &fifo->fifo[fifo->head++];
+}
+
+static inline struct deferred_action *
+add_deferred_actions(struct sk_buff *skb, const struct nlattr *attr)
+{
+   struct action_fifo *fifo;
+   struct deferred_action *da;
+
+   fifo = this_cpu_ptr(&(action_fifos));
+   da = action_fifo_put(fifo);
+
+   if (da) {
+   da->skb = skb;
+   da->actions = attr;
+   }
+
+   return da;
+}
+
 static void flow_key_clone(struct sk_buff *skb, struct sw_flow_key *new_key)
 {
*new_key = *OVS_CB(skb)->pkt_key;
@@ -689,9 +756,9 @@ static bool last_action(const struct nlattr *a, int rem)
 static int sample(struct datapath *dp, struct sk_buff *skb,
  const struct nlattr *attr)
 {
-   struct sw_flow_key sample_key;
const struct nlattr *acts_list = NULL;
const struct nlattr *a;
+   struct deferred_action *da;
int rem;
 
for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
@@ -728,10 +795,19 @@ static int sample(struct datapath *dp, struct sk_buff 
*skb,
/* Skip the sample action when out of memory. */
return 0;
 
-   flow_key_clone(skb, &sample_key);
+   da = add_deferred_actions(skb, a);
+   if (!da) {
+   if (net_ratelimit())
+   pr_warn("%s: deferred actions limit reached, dropping 
sample action\n",
+   ovs_dp_name(dp));
 
-   /* do_execute_actions() will consume the cloned skb. */
-   return do_execute_actions(dp, skb, a, rem);
+   kfree_skb(skb);
+   return 0;
+   }
+
+   flow_key_clone(skb, &da->pkt_key);
+
+   return 0;
 }
 
 static void execute_hash(struct sk_buff *skb, const struct nlattr *attr)
@@ -750,7 +826,7 @@ static void execute_hash(struct sk_buff *skb, const struct 
nlattr *attr)
 }
 
 static int execute_set_action(struct sk_buff *skb,
-const struct nlattr *nested_attr)
+ const struct nlattr *nested_attr)
 {
int err = 0;
 
@@ -801,11 +877,10 @@ static int execute_set_action(struct sk_buff *skb,
return err;
 }
 
-
 st

[ovs-dev] [v5 1/2] datapath: Remove recirc stack depth limit check

2014-09-03 Thread Andy Zhou
Future patches will change the recirc action implementation to not
using recursion. The stack depth detection is no longer necessary.

Signed-off-by: Andy Zhou 
Acked-by: Pravin B Shelar 
---
 datapath/actions.c  | 63 -
 datapath/datapath.c |  6 ++---
 datapath/datapath.h |  4 ++--
 datapath/vport.c|  2 +-
 4 files changed, 10 insertions(+), 65 deletions(-)

diff --git a/datapath/actions.c b/datapath/actions.c
index 43ca2a0..0a22e55 100644
--- a/datapath/actions.c
+++ b/datapath/actions.c
@@ -831,7 +831,7 @@ static int execute_recirc(struct datapath *dp, struct 
sk_buff *skb,
}
 
flow_key_set_recirc_id(skb, nla_get_u32(a));
-   ovs_dp_process_packet(skb, true);
+   ovs_dp_process_packet(skb);
return 0;
 }
 
@@ -924,63 +924,8 @@ static int do_execute_actions(struct datapath *dp, struct 
sk_buff *skb,
return 0;
 }
 
-/* We limit the number of times that we pass into execute_actions()
- * to avoid blowing out the stack in the event that we have a loop.
- *
- * Each loop adds some (estimated) cost to the kernel stack.
- * The loop terminates when the max cost is exceeded.
- * */
-#define RECIRC_STACK_COST 1
-#define DEFAULT_STACK_COST 4
-/* Allow up to 4 regular services, and up to 3 recirculations */
-#define MAX_STACK_COST (DEFAULT_STACK_COST * 4 + RECIRC_STACK_COST * 3)
-
-struct loop_counter {
-   u8 stack_cost;  /* loop stack cost. */
-   bool looping;   /* Loop detected? */
-};
-
-static DEFINE_PER_CPU(struct loop_counter, loop_counters);
-
-static int loop_suppress(struct datapath *dp, struct sw_flow_actions *actions)
-{
-   if (net_ratelimit())
-   pr_warn("%s: flow loop detected, dropping\n",
-   ovs_dp_name(dp));
-   actions->actions_len = 0;
-   return -ELOOP;
-}
-
 /* Execute a list of actions against 'skb'. */
-int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
-   struct sw_flow_actions *acts, bool recirc)
-{
-   const u8 stack_cost = recirc ? RECIRC_STACK_COST : DEFAULT_STACK_COST;
-   struct loop_counter *loop;
-   int error;
-
-   /* Check whether we've looped too much. */
-   loop = &__get_cpu_var(loop_counters);
-   loop->stack_cost += stack_cost;
-   if (unlikely(loop->stack_cost > MAX_STACK_COST))
-   loop->looping = true;
-   if (unlikely(loop->looping)) {
-   error = loop_suppress(dp, acts);
-   kfree_skb(skb);
-   goto out_loop;
-   }
-
-   error = do_execute_actions(dp, skb, acts->actions, acts->actions_len);
-
-   /* Check whether sub-actions looped too much. */
-   if (unlikely(loop->looping))
-   error = loop_suppress(dp, acts);
-
-out_loop:
-   /* Decrement loop stack cost. */
-   loop->stack_cost -= stack_cost;
-   if (!loop->stack_cost)
-   loop->looping = false;
-
-   return error;
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, struct 
sw_flow_actions *acts)
+{
+   return do_execute_actions(dp, skb, acts->actions, acts->actions_len);
 }
diff --git a/datapath/datapath.c b/datapath/datapath.c
index b6eadef..a668222 100644
--- a/datapath/datapath.c
+++ b/datapath/datapath.c
@@ -251,7 +251,7 @@ void ovs_dp_detach_port(struct vport *p)
 }
 
 /* Must be called with rcu_read_lock. */
-void ovs_dp_process_packet(struct sk_buff *skb, bool recirc)
+void ovs_dp_process_packet(struct sk_buff *skb)
 {
const struct vport *p = OVS_CB(skb)->input_vport;
struct sw_flow_key *pkt_key = OVS_CB(skb)->pkt_key;
@@ -283,7 +283,7 @@ void ovs_dp_process_packet(struct sk_buff *skb, bool recirc)
ovs_flow_stats_update(flow, pkt_key->tp.flags, skb);
 
sf_acts = rcu_dereference(flow->sf_acts);
-   ovs_execute_actions(dp, skb, sf_acts, recirc);
+   ovs_execute_actions(dp, skb, sf_acts);
stats_counter = &stats->n_hit;
 
 out:
@@ -581,7 +581,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, 
struct genl_info *info)
sf_acts = rcu_dereference(flow->sf_acts);
 
local_bh_disable();
-   err = ovs_execute_actions(dp, packet, sf_acts, false);
+   err = ovs_execute_actions(dp, packet, sf_acts);
local_bh_enable();
rcu_read_unlock();
 
diff --git a/datapath/datapath.h b/datapath/datapath.h
index e414225..eba2fc4 100644
--- a/datapath/datapath.h
+++ b/datapath/datapath.h
@@ -188,7 +188,7 @@ extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_family dp_vport_genl_family;
 extern struct genl_multicast_group ovs_dp_vport_multicast_group;
 
-void ovs_dp_process_packet(struct sk_buff *, bool recirc);
+void ovs_dp_process_packet(struct sk_buff *c);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
  const struct dp_upcall_info *);
@@ -198,7 +198,7 @@ struct sk_buff *ovs_vport_cmd_build_inf

Re: [ovs-dev] [RFC] Proposal for enhanced select groups

2014-09-03 Thread Simon Horman
On Tue, Sep 02, 2014 at 06:55:14PM -0700, Jesse Gross wrote:
> On Mon, Sep 1, 2014 at 1:10 AM, Simon Horman  
> wrote:
> > On Thu, Aug 28, 2014 at 10:12:49AM +0900, Simon Horman wrote:
> >> On Wed, Aug 27, 2014 at 03:03:53PM -0500, Jesse Gross wrote:
> >> > On Wed, Aug 27, 2014 at 11:51 AM, Ben Pfaff  wrote:
> >> > > On Wed, Aug 27, 2014 at 10:26:14AM +0900, Simon Horman wrote:
> >> > >> On Fri, Aug 22, 2014 at 08:30:08AM -0700, Ben Pfaff wrote:
> >> > >> > On Fri, Aug 22, 2014 at 09:19:41PM +0900, Simon Horman wrote:
> >> > >> What we would like to do is to provide something generally useful
> >> > >> which may be used as appropriate to:
> >> > >
> >> > > I'm going to skip past these ideas, which do sound interesting, because
> >> > > I think that they're more for Pravin and Jesse than for me.  I hope 
> >> > > that
> >> > > they will provide some reactions to them.
> >> >
> >> > For the hardware offloading piece in particular, I would take a look
> >> > at the discussion that has been going on in the netdev mailing list. I
> >> > think the general consensus (to the extent that there is one) is that
> >> > the hardware offload interface should be a block outside of OVS and
> >> > then OVS (mostly likely from userspace) configures it.
> >>
> >> Thanks, I am now digesting that conversation.
> >
> > A lively conversation indeed.
> >
> > We are left with two questions for you:
> >
> > 1. Would you look at a proposal (I have some rough code that even works)
> >for a select group action in the datapath prior to the finalisation
> >of the question of offloads infrastructure in the kernel?
> >
> >From our point of view we would ultimately like to use such an action to
> >offload to hardware. But it seems that there might be use-cases (not the
> >one that I have rough code for) where such an action may be useful. For
> >example to allow parts of IPVS to be used to provide stateful load
> >balancing.
> >
> >Put another: It doesn't seem that a select group action is dependent on
> >an offloads tough there are cases where they could be used together.
> 
> I agree that this is orthogonal to offloading and seems fine to do
> now. It seems particularly nice if we can use IPVS in a clean way,
> similar to what is currently being worked on for connection tracking.
> 
> I guess I'm not entirely sure how you plan to offload this to hardware
> so it's hard to say how it would intersect in the future. However, the
> current plan is to have offloading be directed for a higher point
> (i.e. userspace) and have the OVS kernel module remain a software path
> so probably it doesn't really matter.
> 
> However, I'll Pravin comment since he'll be the one reviewing the code.

Thanks, I will respond to this separately as a response to Pravin's email.

> > 2. Would you consider an set of offload-hooks for Open vSwitch at this time?
> >
> >These could be backed by loading a module that implements the relevant
> >hooks. And in the longer term one such module (possibly to rule them
> >all) could be implemented using the kernel offload API that has
> >been the subject of recent lively discussion.
> 
> I'm not too excited about doing an interim offloading API for this,
> especially since I think the long term version is likely to be
> significantly different.

Thanks, that answers my question.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [patch net-next 01/13] openvswitch: split flow structures into ovs specific and generic ones

2014-09-03 Thread Jamal Hadi Salim

On 09/03/14 17:59, Pravin Shelar wrote:


Both of us are saying same thing.
What I meant was for OVS use-case, where OVS wants to use offload for
switching flows, vswitchd userspace process can program HW offload
using kernel HW offload APIs directly from userspace, rather than
going through OVS kernel module. If user wants to use some other tool,
then the tool can use same kernel HW offload APIs.


Ok, sorry, you are right - we are saying the same thing.

cheers,
jamal
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] Windows NetLink Socket - Support for asynchronous event notification

2014-09-03 Thread Eitan Eliahu
We keep an outstanding, out of band, I/O request in the driver at all time.
Once an event generated the driver queues the event message, completes the
pending I/O and unblocks the calling thread through setting the event in the
overlapped structure n the  NL socket. The thread will read all all event
messages synchronous through the call of nl_sock_recv()
---
 datapath-windows/include/OvsDpInterfaceExt.h |  1 +
 lib/netlink-socket.c | 82 ++--
 2 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/datapath-windows/include/OvsDpInterfaceExt.h 
b/datapath-windows/include/OvsDpInterfaceExt.h
index 73dfcbe..ab2088a 100644
--- a/datapath-windows/include/OvsDpInterfaceExt.h
+++ b/datapath-windows/include/OvsDpInterfaceExt.h
@@ -70,6 +70,7 @@
 /* Commands available under the OVS_WIN_CONTROL_FAMILY. */
 enum ovs_win_control_cmd {
 OVS_CTRL_CMD_WIN_GET_PID,
+OVS_CTRL_CMD_WIN_PEND_REQ
 };
 
 #endif /* __OVS_DP_INTERFACE_EXT_H_ */
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index a6be186..4b535f0 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -80,6 +80,7 @@ static int get_sock_pid_from_kernel(struct nl_sock *sock);
 struct nl_sock {
 #ifdef _WIN32
 HANDLE handle;
+OVERLAPPED overlapped;
 #else
 int fd;
 #endif
@@ -139,21 +140,30 @@ nl_sock_create(int protocol, struct nl_sock **sockp)
 sock = xmalloc(sizeof *sock);
 
 #ifdef _WIN32
-sock->handle = CreateFileA(".\\OpenVSwitchDevice",
+sock->handle = CreateFile(TEXT(".\\OpenVSwitchDevice"),
GENERIC_READ | GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING,
-   FILE_ATTRIBUTE_NORMAL, NULL);
-
-int last_error = GetLastError();
+   FILE_FLAG_OVERLAPPED, NULL);
 
 if (sock->handle == INVALID_HANDLE_VALUE) {
+int last_error = GetLastError();
+VLOG_ERR("fcntl: %s", ovs_strerror(last_error));
+goto error;
+}
+
+memset(&sock->overlapped, 0, sizeof sock->overlapped);
+sock->overlapped.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
+if (sock->overlapped.hEvent == NULL) {
+int last_error = GetLastError();
 VLOG_ERR("fcntl: %s", ovs_strerror(last_error));
 goto error;
 }
+
 #else
 sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
 if (sock->fd < 0) {
+int last_error = GetLastError();
 VLOG_ERR("fcntl: %s", ovs_strerror(errno));
 goto error;
 }
@@ -221,6 +231,9 @@ error:
 }
 }
 #ifdef _WIN32
+if (sock->overlapped.hEvent) {
+CloseHandle(sock->overlapped.hEvent);
+}
 if (sock->handle != INVALID_HANDLE_VALUE) {
 CloseHandle(sock->handle);
 }
@@ -248,6 +261,9 @@ nl_sock_destroy(struct nl_sock *sock)
 {
 if (sock) {
 #ifdef _WIN32
+if (sock->overlapped.hEvent) {
+CloseHandle(sock->overlapped.hEvent);
+}
 CloseHandle(sock->handle);
 #else
 close(sock->fd);
@@ -1040,12 +1056,70 @@ nl_dump_done(struct nl_dump *dump)
 return status == EOF ? 0 : status;
 }
 
+#ifdef _WIN32
+/* Pend an I/O request in the driver. The driver completes the I/O whenever
+* an event or a packet is ready to be read. Once the I/O is completed
+* the overlapped structure event assocaited with the pending I/O will be set
+*/
+static int
+pend_io_request(const struct nl_sock *sock)
+{
+struct ofpbuf request;
+uint64_t request_stub[128];
+struct ovs_header *ovs_header;
+struct nlmsghdr *nlmsg;
+uint32_t seq;
+int retval;
+int error;
+DWORD bytes;
+OVERLAPPED *overlapped = &sock->overlapped;
+
+int ovs_msg_size = sizeof (struct nlmsghdr) + sizeof (struct genlmsghdr) +
+   sizeof (struct ovs_header);
+
+ofpbuf_use_stub(&request, request_stub, sizeof request_stub);
+
+seq = nl_sock_allocate_seq(sock, 1);
+nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0,
+  OVS_CTRL_CMD_WIN_PEND_REQ, OVS_WIN_CONTROL_VERSION);
+nlmsg = nl_msg_nlmsghdr(&request);
+nlmsg->nlmsg_seq = seq;
+
+ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header);
+ovs_header->dp_ifindex = 0;
+
+if (!DeviceIoControl(sock->handle, OVS_IOCTL_WRITE,
+ofpbuf_data(&request), ofpbuf_size(&request),
+NULL, 0, &bytes, overlapped)) {
+error = GetLastError();
+/* Check if the I/O got pended */
+if (error != ERROR_IO_INCOMPLETE && error != ERROR_IO_PENDING) {
+VLOG_ERR("nl_sock_wait failed - %s\n", ovs_format_message(error));
+retval = EINVAL;
+goto done;
+}
+}
+else {
+/* The I/O was completed synchronously */
+poll_immediate_wake();
+}
+retval = 0;
+
+done:
+ofpbuf_uninit(&request);
+return retval;
+}
+#end

[ovs-dev] [PATCH v1] Windows NetLink Socket - Support for asynchronous event notification

2014-09-03 Thread Eitan Eliahu
We keep an outstanding, out of band, I/O request in the driver at all time.
Once an event generated the driver queues the event message, completes the
pending I/O and unblocks the calling thread through setting the event in the
overlapped structure n the  NL socket. The thread will read all all event
messages synchronous through the call of nl_sock_recv()

Signed-off-by: Eitan Eliahu elia...@vmware.com
---
 datapath-windows/include/OvsDpInterfaceExt.h |  1 +
 lib/netlink-socket.c | 82 ++--
 2 files changed, 79 insertions(+), 4 deletions(-)

diff --git a/datapath-windows/include/OvsDpInterfaceExt.h 
b/datapath-windows/include/OvsDpInterfaceExt.h
index 73dfcbe..ab2088a 100644
--- a/datapath-windows/include/OvsDpInterfaceExt.h
+++ b/datapath-windows/include/OvsDpInterfaceExt.h
@@ -70,6 +70,7 @@
 /* Commands available under the OVS_WIN_CONTROL_FAMILY. */
 enum ovs_win_control_cmd {
 OVS_CTRL_CMD_WIN_GET_PID,
+OVS_CTRL_CMD_WIN_PEND_REQ
 };
 
 #endif /* __OVS_DP_INTERFACE_EXT_H_ */
diff --git a/lib/netlink-socket.c b/lib/netlink-socket.c
index a6be186..4b535f0 100644
--- a/lib/netlink-socket.c
+++ b/lib/netlink-socket.c
@@ -80,6 +80,7 @@ static int get_sock_pid_from_kernel(struct nl_sock *sock);
 struct nl_sock {
 #ifdef _WIN32
 HANDLE handle;
+OVERLAPPED overlapped;
 #else
 int fd;
 #endif
@@ -139,21 +140,30 @@ nl_sock_create(int protocol, struct nl_sock **sockp)
 sock = xmalloc(sizeof *sock);
 
 #ifdef _WIN32
-sock->handle = CreateFileA(".\\OpenVSwitchDevice",
+sock->handle = CreateFile(TEXT(".\\OpenVSwitchDevice"),
GENERIC_READ | GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING,
-   FILE_ATTRIBUTE_NORMAL, NULL);
-
-int last_error = GetLastError();
+   FILE_FLAG_OVERLAPPED, NULL);
 
 if (sock->handle == INVALID_HANDLE_VALUE) {
+int last_error = GetLastError();
+VLOG_ERR("fcntl: %s", ovs_strerror(last_error));
+goto error;
+}
+
+memset(&sock->overlapped, 0, sizeof sock->overlapped);
+sock->overlapped.hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);
+if (sock->overlapped.hEvent == NULL) {
+int last_error = GetLastError();
 VLOG_ERR("fcntl: %s", ovs_strerror(last_error));
 goto error;
 }
+
 #else
 sock->fd = socket(AF_NETLINK, SOCK_RAW, protocol);
 if (sock->fd < 0) {
+int last_error = GetLastError();
 VLOG_ERR("fcntl: %s", ovs_strerror(errno));
 goto error;
 }
@@ -221,6 +231,9 @@ error:
 }
 }
 #ifdef _WIN32
+if (sock->overlapped.hEvent) {
+CloseHandle(sock->overlapped.hEvent);
+}
 if (sock->handle != INVALID_HANDLE_VALUE) {
 CloseHandle(sock->handle);
 }
@@ -248,6 +261,9 @@ nl_sock_destroy(struct nl_sock *sock)
 {
 if (sock) {
 #ifdef _WIN32
+if (sock->overlapped.hEvent) {
+CloseHandle(sock->overlapped.hEvent);
+}
 CloseHandle(sock->handle);
 #else
 close(sock->fd);
@@ -1040,12 +1056,70 @@ nl_dump_done(struct nl_dump *dump)
 return status == EOF ? 0 : status;
 }
 
+#ifdef _WIN32
+/* Pend an I/O request in the driver. The driver completes the I/O whenever
+* an event or a packet is ready to be read. Once the I/O is completed
+* the overlapped structure event assocaited with the pending I/O will be set
+*/
+static int
+pend_io_request(const struct nl_sock *sock)
+{
+struct ofpbuf request;
+uint64_t request_stub[128];
+struct ovs_header *ovs_header;
+struct nlmsghdr *nlmsg;
+uint32_t seq;
+int retval;
+int error;
+DWORD bytes;
+OVERLAPPED *overlapped = &sock->overlapped;
+
+int ovs_msg_size = sizeof (struct nlmsghdr) + sizeof (struct genlmsghdr) +
+   sizeof (struct ovs_header);
+
+ofpbuf_use_stub(&request, request_stub, sizeof request_stub);
+
+seq = nl_sock_allocate_seq(sock, 1);
+nl_msg_put_genlmsghdr(&request, 0, OVS_WIN_NL_CTRL_FAMILY_ID, 0,
+  OVS_CTRL_CMD_WIN_PEND_REQ, OVS_WIN_CONTROL_VERSION);
+nlmsg = nl_msg_nlmsghdr(&request);
+nlmsg->nlmsg_seq = seq;
+
+ovs_header = ofpbuf_put_uninit(&request, sizeof *ovs_header);
+ovs_header->dp_ifindex = 0;
+
+if (!DeviceIoControl(sock->handle, OVS_IOCTL_WRITE,
+ofpbuf_data(&request), ofpbuf_size(&request),
+NULL, 0, &bytes, overlapped)) {
+error = GetLastError();
+/* Check if the I/O got pended */
+if (error != ERROR_IO_INCOMPLETE && error != ERROR_IO_PENDING) {
+VLOG_ERR("nl_sock_wait failed - %s\n", ovs_format_message(error));
+retval = EINVAL;
+goto done;
+}
+}
+else {
+/* The I/O was completed synchronously */
+poll_immediate_wake();
+}
+retval = 0;
+
+done:
+ofpbu