Optimizing vxlan tunneling performance in userspace datapath using
flow director feature in Fortville NIC DPDK ports. OVS uses metadata
 reported by NIC to improve the flow lookup performance on VxLAN
 packets.

Signed-off-by: Sugesh Chandran <sugesh.chand...@intel.com>
---
 lib/automake.mk      |   2 +
 lib/dpdk-i40e-ofld.c | 266 +++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/dpdk-i40e-ofld.h |  59 ++++++++++++
 lib/dpif-netdev.c    | 118 ++++++++++++++++++++++-
 lib/netdev-dpdk.c    |  41 +++++++-
 5 files changed, 481 insertions(+), 5 deletions(-)
 create mode 100644 lib/dpdk-i40e-ofld.c
 create mode 100644 lib/dpdk-i40e-ofld.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 27a1669..da48479 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -366,6 +366,8 @@ endif
 
 if DPDK_NETDEV
 lib_libopenvswitch_la_SOURCES += \
+       lib/dpdk-i40e-ofld.c \
+       lib/dpdk-i40e-ofld.h \
        lib/netdev-dpdk.c \
        lib/netdev-dpdk.h
 endif
diff --git a/lib/dpdk-i40e-ofld.c b/lib/dpdk-i40e-ofld.c
new file mode 100644
index 0000000..3ea7084
--- /dev/null
+++ b/lib/dpdk-i40e-ofld.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2016 Intel Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#include "dpdk-i40e-ofld.h"
+#include "errno.h"
+#include "ovs-thread.h"
+#include "openvswitch/vlog.h"
+#include "netdev-provider.h"
+#include "rte_pci_dev_ids.h"
+#include "rte_ethdev.h"
+
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+VLOG_DEFINE_THIS_MODULE(dpdk_hw_ofld);
+
+#define VXLAN_DST_PORT          4789
+#define VXLAN_HLEN                  50
+#define MAX_FDIR_RULES          8000
+
+static uint32_t total_fdir_ids;
+static struct ovs_mutex hw_ofld_mutex = OVS_MUTEX_INITIALIZER;
+
+/*
+ * Returns '0' if FDIR IDs reaches max limit. Only 8000 entries are
+ * supported in FVL.
+ */
+static inline uint32_t
+i40e_fdir_entry_cnt_inc(void)
+{
+    if (total_fdir_ids < MAX_FDIR_RULES) {
+        ovs_mutex_lock(&hw_ofld_mutex);
+        total_fdir_ids++;
+        ovs_mutex_unlock(&hw_ofld_mutex);
+        return (total_fdir_ids);
+    }
+    return 0;
+}
+
+static inline void
+i40e_fdir_entry_cnt_decr(void)
+{
+    ovs_mutex_lock(&hw_ofld_mutex);
+    total_fdir_ids ? total_fdir_ids-- : 0;
+    ovs_mutex_unlock(&hw_ofld_mutex);
+}
+
+/*
+ * Release the hardware offloading functionality from the dpdk-port.
+ */
+int
+dpdk_hw_ofld_port_release(struct netdev_dpdk *dpdk_port)
+{
+    ovs_mutex_lock(&hw_ofld_mutex);
+    set_i40e_ofld_flag(dpdk_port, 0);
+    ovs_mutex_unlock(&hw_ofld_mutex);
+    return 0;
+}
+
+int
+dpdk_eth_dev_hw_ofld_init(struct netdev_dpdk *dev,
+                                        int n_rxq, int n_txq,
+                                        struct rte_eth_conf *port_conf)
+{
+    int err = 0;
+    struct rte_eth_dev_info info;
+    uint16_t vendor_id, device_id;
+
+    rte_eth_dev_info_get(get_dpdk_port_id(dev), &info);
+    vendor_id = info.pci_dev->id.vendor_id;
+    device_id = info.pci_dev->id.device_id;
+    /* Configure vxlan offload only if its FVL NIC */
+    if (vendor_id != PCI_VENDOR_ID_INTEL || device_id !=
+                                            I40E_DEV_ID_SFP_XL710) {
+        ovs_mutex_lock(&hw_ofld_mutex);
+        set_i40e_ofld_flag(dev, 0);
+        ovs_mutex_unlock(&hw_ofld_mutex);
+        err = rte_eth_dev_configure(get_dpdk_port_id(dev),
+                                    n_rxq, n_txq, port_conf);
+        return err;
+    }
+    ovs_mutex_lock(&hw_ofld_mutex);
+    set_i40e_ofld_flag(dev, 1);
+    ovs_mutex_unlock(&hw_ofld_mutex);
+    /* Configure FVL FDIR VxLAN tunnel handing */
+    port_conf->fdir_conf.mode = RTE_FDIR_MODE_PERFECT;
+    port_conf->fdir_conf.flex_conf.nb_payloads = 1;
+    port_conf->fdir_conf.flex_conf.flex_set[0].type = RTE_ETH_L4_PAYLOAD;
+    /* Need to initilize all the 16 flex bytes,no matter;
+     * what we really using, possibly a DPDK bug?? */
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[0] = 0;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[1] = 1;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[2] = 2;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[3] = 3;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[4] = 4;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[5] = 5;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[6] = 6;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[7] = 7;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[8] = 8;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[9] = 9;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[10] = 10;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[11] = 11;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[12] = 12;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[13] = 13;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[14] = 14;
+    port_conf->fdir_conf.flex_conf.flex_set[0].src_offset[15] = 15;
+    err = rte_eth_dev_configure(get_dpdk_port_id(dev),
+                                n_rxq, n_txq, port_conf);
+    if (err) {
+        VLOG_ERR("Failed to configure DPDK port with hardware offload");
+        return err;
+    }
+    /*Clean all FDIR entries if any */
+    err = rte_eth_dev_filter_ctrl(get_dpdk_port_id(dev),
+            RTE_ETH_FILTER_FDIR, RTE_ETH_FILTER_FLUSH, NULL);
+    return err;
+}
+
+/*
+ * Install rules for VxLAN packets in hardware
+ */
+int
+set_up_hw_offload_port_rule(struct netdev *netdev__,
+                                const struct flow *flow,
+                                const uint32_t hw_flow_id,
+                                const bool is_add_rule)
+{
+    int err = 0;
+    uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN] = { 0 };
+    uint32_t *vni;
+    enum rte_filter_op filter_op;
+    struct rte_eth_fdir_filter entry = { 0 };
+    struct netdev_dpdk *netdev;
+
+    netdev = netdev_dpdk_cast(netdev__);
+    if (is_i40e_ofld_enable(netdev)) {
+        entry.soft_id = hw_flow_id;
+        if (!entry.soft_id) {
+            VLOG_DBG("Invalid flow ID, Cant install rule in the NIC for "
+                             "hardware offload");
+            err = ECANCELED;
+            return err;
+        }
+        /* Install rules in NIC only for VxLAN flows */
+        if (ntohs(flow->tp_dst) != VXLAN_DST_PORT) {
+            return 0;
+        }
+        entry.action.flex_off = 0;  /* use 0 by default */
+        entry.input.flow_ext.vlan_tci = 0; //! ignored by i40e fdir
+        entry.action.behavior = RTE_ETH_FDIR_PASSTHRU;
+        entry.action.report_status = RTE_ETH_FDIR_REPORT_ID_FLEX_4;
+        entry.input.flow_type = RTE_ETH_FLOW_NONFRAG_IPV4_UDP;
+        entry.input.flow.ip4_flow.src_ip = flow->nw_src;
+        entry.input.flow.ip4_flow.dst_ip = flow->nw_dst;
+        entry.input.flow.udp4_flow.dst_port = htons(VXLAN_DST_PORT);
+        entry.input.flow.udp4_flow.src_port = flow->tp_src;
+        vni = (uint32_t *)&flexbytes[4];
+        *vni = flow->tunnel.tun_id << 8;
+        memcpy(entry.input.flow_ext.flexbytes, flexbytes,
+                      RTE_ETH_FDIR_MAX_FLEXLEN);
+        entry.action.rx_queue = 0;
+        filter_op = is_add_rule ? RTE_ETH_FILTER_ADD :
+                                              RTE_ETH_FILTER_DELETE;
+        err = rte_eth_dev_filter_ctrl(get_dpdk_port_id(netdev),
+                 RTE_ETH_FILTER_FDIR, filter_op, &entry);
+
+        /*
+         * XXX : Delayed the max limit check for flow director entries after
+         * the configuration. Anyway the rte_eth_dev_filter_ctrl will fail if
+         * max limit reaches. This can be used for tracking.
+         */
+        if (is_add_rule) {
+            if (!i40e_fdir_entry_cnt_inc()) {
+                VLOG_DBG("Cant configure rule on NIC, Flow director "
+                        "entries hits max limit");
+            }
+        }
+        else {
+            i40e_fdir_entry_cnt_decr();
+        }
+        if (err < 0) {
+            VLOG_DBG("flow director programming error in NIC: (%d)\n", err);
+            return err;
+        }
+    }
+    return err;
+}
+
+static int
+i40e_dpdk_port_get_hw_ofld_pkts(struct
+                 dp_netdev_pmd_thread *pmd, struct dp_packet
+                 **in_packets, struct dp_packet **hw_packets,
+                 struct dp_packet **non_hw_packets,
+                 uint32_t cnt)
+{
+    int i, hw_pkt_cnt = 0, norm_pkt_cnt = 0;
+    const struct dp_netdev_flow *flow;
+    struct rte_mbuf *mbuf;
+
+    for (i = 0; i < cnt; i++) {
+        mbuf = (struct rte_mbuf *)in_packets[i];
+        if (mbuf->ol_flags & PKT_RX_FDIR_ID) {
+            flow = lookup_hw_offload_flow_for_fdirid(pmd, mbuf,
+                                                     mbuf->hash.fdir.hi);
+            if (!flow) {
+                /* Bogus flow in hw, cannot find it in OVS EMC */
+                mbuf->ol_flags &= ~PKT_RX_FDIR_ID;
+                non_hw_packets[norm_pkt_cnt++] = in_packets[i];
+                continue;
+            }
+            dp_packet_reset_packet(in_packets[i], VXLAN_HLEN);
+            mbuf->ol_flags |= PKT_RX_RSS_HASH;
+            mbuf->hash.rss = hash_finish(mbuf->hash.rss, 1);
+            hw_packets[hw_pkt_cnt++] = in_packets[i];
+        }
+        else {
+            non_hw_packets[norm_pkt_cnt++] = in_packets[i];
+        }
+    }
+    return hw_pkt_cnt;
+}
+
+/*
+ * Process the packets based on hardware offload configuration
+ */
+void
+hw_ofld_dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
+                             struct netdev_rxq *netdev_rxq,
+                             struct dp_packet **packets, int cnt,
+                             odp_port_t port_no)
+{
+    int hw_pkt_cnt;
+    struct dp_packet *hw_ofld_packets[NETDEV_MAX_BURST] = { 0 };
+    struct dp_packet *orig_packets[NETDEV_MAX_BURST] = { 0 };
+    struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_rxq->netdev);
+
+    if (is_i40e_ofld_enable(netdev)) {
+        hw_pkt_cnt = i40e_dpdk_port_get_hw_ofld_pkts(pmd, packets,
+                                                          hw_ofld_packets,
+                                                          orig_packets, cnt);
+        /* Process packet streams separately. */
+        if (hw_pkt_cnt) {
+            dp_netdev_input(pmd, hw_ofld_packets, hw_pkt_cnt, port_no);
+        }
+        if (cnt - hw_pkt_cnt) {
+            dp_netdev_input(pmd, orig_packets, (cnt - hw_pkt_cnt), port_no);
+        }
+    }
+    else {
+        dp_netdev_input(pmd, packets, cnt, port_no);
+    }
+}
+#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
diff --git a/lib/dpdk-i40e-ofld.h b/lib/dpdk-i40e-ofld.h
new file mode 100644
index 0000000..1aad246
--- /dev/null
+++ b/lib/dpdk-i40e-ofld.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2016 Intel Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef DPDK_I40E_OFLD_H_
+#define DPDK_I40E_OFLD_H_
+
+#include <config.h>
+
+#include "dp-packet.h"
+#include "netdev.h"
+
+/*
+ * Macro to enable/disable HW OFFLOAD feature for DPDK.
+ * 1 :- Enable HW_OFFLOAD support in OVS
+ * 0 :- Disable HW_OFFLOAD support in OVS
+ */
+#define DPDK_I40E_TNL_OFFLOAD_ENABLE        1
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+
+struct netdev_dpdk;
+struct dp_netdev_pmd_thread;
+struct dp_netdev_flow;
+
+struct netdev_dpdk *netdev_dpdk_cast(const struct netdev *netdev);
+extern inline bool is_i40e_ofld_enable(const struct netdev_dpdk *netdev);
+extern inline void set_i40e_ofld_flag(struct netdev_dpdk *netdev, bool flag);
+extern inline int get_dpdk_port_id(struct netdev_dpdk *dpdk_port);
+int dpdk_eth_dev_hw_ofld_init(struct netdev_dpdk *dev, int n_rxq, int n_txq,
+                              struct rte_eth_conf *port_conf);
+int dpdk_hw_ofld_port_release(struct netdev_dpdk *dpdk_port);
+int set_up_hw_offload_port_rule(struct netdev *netdev__,
+                                const struct flow *flow,
+                                const uint32_t hw_flow_id,
+                                const bool is_add_rule);
+void hw_ofld_dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
+                             struct netdev_rxq *netdev_rxq,
+                             struct dp_packet **packets, int cnt,
+                             odp_port_t port_no);
+const struct dp_netdev_flow *lookup_hw_offload_flow_for_fdirid(
+                            const struct dp_netdev_pmd_thread *pmd,
+                            struct rte_mbuf *mbuf, uint32_t flow_id);
+void dp_netdev_input(struct dp_netdev_pmd_thread *, struct dp_packet **, 
+                     int cnt, odp_port_t port_no);
+
+#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
+#endif /* DPDK_I40E_OFLD_H_ */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index cf574ad..d79b239 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -70,6 +70,7 @@
 #include "util.h"
 
 #include "openvswitch/vlog.h"
+#include "dpdk-i40e-ofld.h"
 
 VLOG_DEFINE_THIS_MODULE(dpif_netdev);
 
@@ -478,7 +479,7 @@ static void dp_netdev_execute_actions(struct 
dp_netdev_pmd_thread *pmd,
                                       bool may_steal,
                                       const struct nlattr *actions,
                                       size_t actions_len);
-static void dp_netdev_input(struct dp_netdev_pmd_thread *,
+void dp_netdev_input(struct dp_netdev_pmd_thread *,
                             struct dp_packet **, int cnt, odp_port_t port_no);
 static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
                                   struct dp_packet **, int cnt);
@@ -1455,6 +1456,28 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread 
*pmd,
     flow->dead = true;
 
     dp_netdev_flow_unref(flow);
+
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+    struct dp_netdev_port *dp_port;
+    int err;
+    odp_port_t in_port = flow->flow.in_port.odp_port;
+    err = get_port_by_number(pmd->dp, in_port, &dp_port);
+    if (err) {
+        VLOG_WARN("Cannot get the port information, hardware offload may "
+                "not be functional");
+        return;
+    }
+    if(strcmp(dp_port->type, "dpdk")) {
+        /* No hardware offload on a non-DPDK port") */
+        return;
+    }
+    /* Remove the hardware offload rule if exists.*/
+    if(set_up_hw_offload_port_rule(dp_port->netdev, &flow->flow,
+            dp_netdev_flow_hash(&(flow->ufid)), 0)) {
+        VLOG_DBG("Failed to delete the hardware offload rule");
+        return;
+    }
+#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
 }
 
 static void
@@ -2059,6 +2082,32 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
         ds_destroy(&ds);
     }
 
+    /*
+     * Configure the hardware offload for tunnel while flows are getting
+     * inserted in OVS.
+     */
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+    struct dp_netdev_port *dp_port;
+    int err;
+    odp_port_t in_port = flow->flow.in_port.odp_port;
+    err = get_port_by_number(pmd->dp, in_port, &dp_port);
+    if (err) {
+        VLOG_WARN("Cannot get the port information, Failed to configure "
+                            "hardware offload");
+        goto out;
+    }
+    if (strcmp(dp_port->type, "dpdk")) {
+        /* No hardware offload on a non-DPDK port */
+        goto out;
+    }
+    /* install the rule in hw, reduntant might overwrite if it exists*/
+    if (set_up_hw_offload_port_rule(dp_port->netdev, &flow->flow,
+            dp_netdev_flow_hash(&flow->ufid), 1)) {
+        VLOG_ERR("Failed to install the hardware offload rule");
+        goto out;
+    }
+#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
+out:
     return flow;
 }
 
@@ -2575,7 +2624,19 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread 
*pmd,
         *recirc_depth_get() = 0;
 
         cycles_count_start(pmd);
+
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+        /* Check if the source port is DPDK */
+        if (packets[0]->source == DPBUF_DPDK) {
+            hw_ofld_dp_netdev_input(pmd, rxq, packets, cnt, port->port_no);
+        }
+        else {
+            dp_netdev_input(pmd, packets, cnt, port->port_no);
+        }
+#else
         dp_netdev_input(pmd, packets, cnt, port->port_no);
+#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
+
         cycles_count_end(pmd, PMD_CYCLES_PROCESSING);
     } else if (error != EAGAIN && error != EOPNOTSUPP) {
         static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
@@ -3321,7 +3382,6 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct 
dp_packet *packet_,
         flow->tunnel.metadata.present.len = orig_tunnel.metadata.present.len;
         flow->tunnel.flags |= FLOW_TNL_F_UDPIF;
     }
-
     return err;
 }
 
@@ -3430,6 +3490,7 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct 
dp_packet **packets,
     struct emc_cache *flow_cache = &pmd->flow_cache;
     struct netdev_flow_key *key = &keys[0];
     size_t i, n_missed = 0, n_dropped = 0;
+    struct rte_mbuf *mbuf;
 
     for (i = 0; i < cnt; i++) {
         struct dp_netdev_flow *flow;
@@ -3454,7 +3515,18 @@ emc_processing(struct dp_netdev_pmd_thread *pmd, struct 
dp_packet **packets,
         key->len = 0; /* Not computed yet. */
         key->hash = dpif_netdev_packet_get_rss_hash(packet, &key->mf);
 
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+        mbuf = (struct rte_mbuf *)packet;
+        if (mbuf->ol_flags & PKT_RX_FDIR_ID) {
+            flow = lookup_hw_offload_flow_for_fdirid(pmd, mbuf, 0);
+        }
+        else {
+            flow = emc_lookup(flow_cache, key);
+        }
+#else
         flow = emc_lookup(flow_cache, key);
+#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
+
         if (OVS_LIKELY(flow)) {
             dp_netdev_queue_batches(packet, flow, &key->mf, batches,
                                     n_batches);
@@ -3651,7 +3723,7 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
     }
 }
 
-static void
+void
 dp_netdev_input(struct dp_netdev_pmd_thread *pmd,
                 struct dp_packet **packets, int cnt,
                 odp_port_t port_no)
@@ -4290,3 +4362,43 @@ dpcls_lookup(const struct dpcls *cls, const struct 
netdev_flow_key keys[],
     }
     return false;                     /* Some misses. */
 }
+
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+/*
+ * EMC lookup function on 'flow id' reported by NIC.
+ */
+const struct dp_netdev_flow *
+lookup_hw_offload_flow_for_fdirid(const struct
+                 dp_netdev_pmd_thread *pmd, struct rte_mbuf *mbuf,
+                 uint32_t flow_id)
+{
+    const struct emc_cache *flow_cache = &pmd->flow_cache;
+    struct netdev_flow_key key;
+    struct emc_entry *current_entry;
+
+    key.len = 0;
+    if (OVS_LIKELY(mbuf->ol_flags & PKT_RX_RSS_HASH)) {
+        key.hash = mbuf->hash.rss;
+    }
+    else {
+        return NULL;
+    }
+    EMC_FOR_EACH_POS_WITH_HASH(flow_cache, current_entry, key.hash) {
+        if (current_entry->key.hash == key.hash
+            && emc_entry_alive(current_entry)) {
+            if (OVS_UNLIKELY(flow_id && dp_netdev_flow_hash(
+                                       &current_entry->flow->ufid) !=
+                                       flow_id)) {
+                /* Hash collision in emc, fallback to software path */
+                return NULL;
+            }
+            return current_entry->flow;
+        }
+    }
+    /* XXX :: An improved classifier lookup needed here without any miniflow
+     * extract to keep it performant.Until then fallback to software based
+     * packet forwarding on EMC miss.
+     */
+     return NULL;
+}
+#endif /* DPDK_I40E_TNL_OFFLOAD_ENABLE */
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f402354..2954f83 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -56,6 +56,7 @@
 #include "rte_mbuf.h"
 #include "rte_meter.h"
 #include "rte_virtio_net.h"
+#include "dpdk-i40e-ofld.h"
 
 VLOG_DEFINE_THIS_MODULE(dpdk);
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -112,7 +113,7 @@ static char *vhost_sock_dir = NULL;   /* Location of 
vhost-user sockets */
  */
 #define VHOST_ENQ_RETRY_USECS 100
 
-static const struct rte_eth_conf port_conf = {
+static struct rte_eth_conf port_conf = {
     .rxmode = {
         .mq_mode = ETH_MQ_RX_RSS,
         .split_hdr_size = 0,
@@ -331,6 +332,9 @@ struct netdev_dpdk {
 
     /* Identifier used to distinguish vhost devices from each other */
     char vhost_id[PATH_MAX];
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+    bool i40e_ofld_enable; /* hardware/NIC offload flag*/
+#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
 
     /* In dpdk_list. */
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
@@ -346,6 +350,24 @@ struct netdev_rxq_dpdk {
     int port_id;
 };
 
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+inline bool is_i40e_ofld_enable(const struct netdev_dpdk *netdev)
+{
+    return netdev->i40e_ofld_enable;
+}
+
+inline void set_i40e_ofld_flag(struct netdev_dpdk *netdev,
+                                                bool flag)
+{
+    netdev->i40e_ofld_enable = flag;
+}
+
+inline int get_dpdk_port_id(struct netdev_dpdk *dpdk_port)
+{
+    return dpdk_port->port_id;
+}
+#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
+
 static bool dpdk_thread_is_pmd(void);
 
 static int netdev_dpdk_construct(struct netdev *);
@@ -539,10 +561,21 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
n_rxq, int n_txq)
             VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq);
         }
 
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+        diag = (!dev->i40e_ofld_enable && dev->type == DPDK_DEV_ETH) ?
+                    dpdk_eth_dev_hw_ofld_init(dev, n_rxq, n_txq, &port_conf) :
+                    rte_eth_dev_configure(dev->port_id,
+                    n_rxq, n_txq, &port_conf);
+        if (diag) {
+            /* rte_dev_configure error */
+            break;
+        }
+#else
         diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &port_conf);
         if (diag) {
             break;
         }
+#endif //DPDK_I40E_TNL_OFFLOAD_ENABLE
 
         for (i = 0; i < n_txq; i++) {
             diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE,
@@ -637,7 +670,7 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) 
OVS_REQUIRES(dpdk_mutex)
     return 0;
 }
 
-static struct netdev_dpdk *
+struct netdev_dpdk *
 netdev_dpdk_cast(const struct netdev *netdev)
 {
     return CONTAINER_OF(netdev, struct netdev_dpdk, up);
@@ -861,6 +894,10 @@ netdev_dpdk_destruct(struct netdev *netdev_)
     rte_free(dev->tx_q);
     list_remove(&dev->list_node);
     dpdk_mp_put(dev->dpdk_mp);
+
+#ifdef DPDK_I40E_TNL_OFFLOAD_ENABLE
+        dpdk_hw_ofld_port_release(dev);
+#endif /* DPDK_I40E_TNL_OFFLOAD_ENABLE */
     ovs_mutex_unlock(&dpdk_mutex);
 }
 
-- 
1.9.1

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to