In order for OVS running inside a VM using IVSHMEM to recognize ports created
on the host, you have to start vswitchd with the --proc-type=secondary EAL
option.

When creating rings in secondary processes functions like
rte_eth_dev_configure() fail with the error code E_RTE_SECONDARY, i.e., the
operations are not allowed in secondary processes. Avoiding this requires some
changes to the way secondary processes handle dpdk rings.

This patch changes dpdk_ring_create() to use rte_ring_lookup() instead of
rte_ring_create() when called from a secondary process. It also introduces two
functions: netdev_dpdk_ring_rxq_recv() and netdev_dpdk_ring_send__() to handle
tx/rx on dpdk rings in secondary processes.

Signed-off-by: Melvin Walls <mwall...@gmail.com>
Signed-off-by: Ethan Jackson <et...@nicira.com>
---
 lib/netdev-dpdk.c | 158 +++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 122 insertions(+), 36 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 5ae805e..5abe90f 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -227,6 +227,10 @@ struct netdev_dpdk {
     /* Identifier used to distinguish vhost devices from each other */
     char vhost_id[PATH_MAX];
 
+    /* Rings for secondary processes in IVSHMEM setups, NULL otherwise */
+    struct rte_ring *rx_ring;
+    struct rte_ring *tx_ring;
+
     /* In dpdk_list. */
     struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
 };
@@ -340,12 +344,16 @@ dpdk_mp_get(int socket_id, int mtu) 
OVS_REQUIRES(dpdk_mutex)
             return NULL;
         }
 
-        dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
-                                     MP_CACHE_SZ,
-                                     sizeof(struct rte_pktmbuf_pool_private),
-                                     rte_pktmbuf_pool_init, NULL,
-                                     ovs_rte_pktmbuf_init, NULL,
-                                     socket_id, 0);
+        if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+            dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
+                                         MP_CACHE_SZ,
+                                         sizeof(struct 
rte_pktmbuf_pool_private),
+                                         rte_pktmbuf_pool_init, NULL,
+                                         ovs_rte_pktmbuf_init, NULL,
+                                         socket_id, 0);
+        } else {
+            dmp->mp = rte_mempool_lookup(mp_name);
+        }
     } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
 
     if (dmp->mp == NULL) {
@@ -439,39 +447,41 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev) 
OVS_REQUIRES(dpdk_mutex)
     dev->up.n_rxq = MIN(info.max_rx_queues, dev->up.n_rxq);
     dev->real_n_txq = MIN(info.max_tx_queues, dev->up.n_txq);
 
-    diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, dev->real_n_txq,
-                                 &port_conf);
-    if (diag) {
-        VLOG_ERR("eth dev config error %d. rxq:%d txq:%d", diag, dev->up.n_rxq,
-                 dev->real_n_txq);
-        return -diag;
-    }
-
-    for (i = 0; i < dev->real_n_txq; i++) {
-        diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE,
-                                      dev->socket_id, NULL);
+    if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+        diag = rte_eth_dev_configure(dev->port_id, dev->up.n_rxq, 
dev->real_n_txq,
+                                     &port_conf);
         if (diag) {
-            VLOG_ERR("eth dev tx queue setup error %d",diag);
+            VLOG_ERR("eth dev config error %d. rxq:%d txq:%d", diag, 
dev->up.n_rxq,
+                     dev->real_n_txq);
             return -diag;
         }
-    }
 
-    for (i = 0; i < dev->up.n_rxq; i++) {
-        diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE,
-                                      dev->socket_id,
-                                      NULL, dev->dpdk_mp->mp);
+        for (i = 0; i < dev->real_n_txq; i++) {
+            diag = rte_eth_tx_queue_setup(dev->port_id, i, NIC_PORT_TX_Q_SIZE,
+                                          dev->socket_id, NULL);
+            if (diag) {
+                VLOG_ERR("eth dev tx queue setup error %d",diag);
+                return -diag;
+            }
+        }
+
+        for (i = 0; i < dev->up.n_rxq; i++) {
+            diag = rte_eth_rx_queue_setup(dev->port_id, i, NIC_PORT_RX_Q_SIZE,
+                                          dev->socket_id,
+                                          NULL, dev->dpdk_mp->mp);
+            if (diag) {
+                VLOG_ERR("eth dev rx queue setup error %d",diag);
+                return -diag;
+            }
+        }
+
+        diag = rte_eth_dev_start(dev->port_id);
         if (diag) {
-            VLOG_ERR("eth dev rx queue setup error %d",diag);
+            VLOG_ERR("eth dev start error %d",diag);
             return -diag;
         }
     }
 
-    diag = rte_eth_dev_start(dev->port_id);
-    if (diag) {
-        VLOG_ERR("eth dev start error %d",diag);
-        return -diag;
-    }
-
     rte_eth_promiscuous_enable(dev->port_id);
     rte_eth_allmulticast_enable(dev->port_id);
 
@@ -532,6 +542,8 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
port_no,
     OVS_REQUIRES(dpdk_mutex)
 {
     struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
+    char *rxq_name = xasprintf("%s_tx", netdev->up.name);
+    char *txq_name = xasprintf("%s_rx", netdev->up.name);
     int sid;
     int err = 0;
 
@@ -574,6 +586,19 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
port_no,
         }
     }
 
+    if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+        netdev->rx_ring = netdev->tx_ring = NULL;
+    } else {
+        netdev->rx_ring = rte_ring_lookup(rxq_name);
+        netdev->tx_ring = rte_ring_lookup(txq_name);
+        if (!netdev->rx_ring || !netdev->tx_ring) {
+            err = ENOMEM;
+        }
+    }
+
+    free(rxq_name);
+    free(txq_name);
+
     list_push_back(&dpdk_list, &netdev->list_node);
 
 unlock:
@@ -957,6 +982,36 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq_, struct 
dp_packet **packets,
     return 0;
 }
 
+static int
+netdev_dpdk_ring_rxq_recv(struct netdev_rxq *rxq_,
+                           struct dp_packet **packets, int *c)
+{
+    struct netdev_dpdk *netdev = netdev_dpdk_cast(rxq_->netdev);
+    struct rte_ring *rx_ring = netdev->rx_ring;
+    unsigned rx_pkts = NETDEV_MAX_BURST;
+
+    /* Only use netdev_dpdk_ring_rxq_recv() as a secondary process. There are 
operations
+     * performed by netdev_dpdk_rxq_recv() that primary processes are 
responsible for and
+     * cannot be performed by secondary processes. */
+    if (OVS_LIKELY(rte_eal_process_type() == RTE_PROC_PRIMARY)) {
+        return netdev_dpdk_rxq_recv(rxq_,packets,c);
+    }
+
+    while (OVS_UNLIKELY(rte_ring_dequeue_bulk(rx_ring, (void **)packets, 
rx_pkts) != 0) &&
+        rx_pkts > 0) {
+        rx_pkts = rte_ring_count(rx_ring);
+        rx_pkts = (unsigned)MIN(rx_pkts,NETDEV_MAX_BURST);
+    }
+   
+    if (!rx_pkts) {
+        return EAGAIN;
+    }
+
+    *c = rx_pkts;
+
+    return 0;
+}
+
 static void
 __netdev_dpdk_vhost_send(struct netdev *netdev, struct dp_packet **pkts,
                          int cnt, bool may_steal)
@@ -1147,6 +1202,20 @@ netdev_dpdk_vhost_send(struct netdev *netdev, int qid 
OVS_UNUSED, struct dp_pack
 }
 
 static inline void
+netdev_dpdk_ring_send__(struct netdev_dpdk *netdev,
+                        struct dp_packet **pkts, int cnt)
+{
+    struct rte_ring *tx_ring = netdev->tx_ring;
+    int rslt = 0;
+
+    if (tx_ring != NULL) {
+        do {
+            rslt = rte_ring_enqueue_bulk(tx_ring, (void **)pkts, cnt);
+        } while (rslt == -ENOBUFS);
+    }
+}
+
+static inline void
 netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
                    struct dp_packet **pkts, int cnt, bool may_steal)
 {
@@ -1812,8 +1881,13 @@ dpdk_ring_create(const char dev_name[], unsigned int 
port_no,
     }
 
     /* Create single consumer/producer rings, netdev does explicit locking. */
-    ivshmem->cring_tx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0,
-                                        RING_F_SP_ENQ | RING_F_SC_DEQ);
+    if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+        ivshmem->cring_tx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0,
+                                            RING_F_SP_ENQ | RING_F_SC_DEQ);
+    } else {
+        ivshmem->cring_tx = rte_ring_lookup(ring_name);
+    }
+
     if (ivshmem->cring_tx == NULL) {
         rte_free(ivshmem);
         return ENOMEM;
@@ -1825,8 +1899,13 @@ dpdk_ring_create(const char dev_name[], unsigned int 
port_no,
     }
 
     /* Create single consumer/producer rings, netdev does explicit locking. */
-    ivshmem->cring_rx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0,
-                                        RING_F_SP_ENQ | RING_F_SC_DEQ);
+    if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
+        ivshmem->cring_rx = rte_ring_create(ring_name, DPDK_RING_SIZE, SOCKET0,
+                                            RING_F_SP_ENQ | RING_F_SC_DEQ);
+    } else {
+        ivshmem->cring_rx = rte_ring_lookup(ring_name);
+    }
+
     if (ivshmem->cring_rx == NULL) {
         rte_free(ivshmem);
         return ENOMEM;
@@ -1888,7 +1967,14 @@ netdev_dpdk_ring_send(struct netdev *netdev_, int qid,
         dp_packet_set_rss_hash(pkts[i], 0);
     }
 
-    netdev_dpdk_send__(netdev, qid, pkts, cnt, may_steal);
+    /* Only use netdev_dpdk_send__() as a primary process. It leads to the 
execution
+     * of code that cannot be executed by secondary processes. */
+    if (OVS_LIKELY(rte_eal_process_type() == RTE_PROC_PRIMARY)) {
+        netdev_dpdk_send__(netdev, qid, pkts, cnt, may_steal);
+    } else {
+        netdev_dpdk_ring_send__(netdev, pkts, cnt);
+    }
+
     return 0;
 }
 
@@ -2101,7 +2187,7 @@ static const struct netdev_class dpdk_ring_class =
         netdev_dpdk_get_stats,
         netdev_dpdk_get_features,
         netdev_dpdk_get_status,
-        netdev_dpdk_rxq_recv);
+        netdev_dpdk_ring_rxq_recv);
 
 static const struct netdev_class OVS_UNUSED dpdk_vhost_cuse_class =
     NETDEV_DPDK_CLASS(
-- 
1.9.3 (Apple Git-50)
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to