If rte_mempool_create() fails with ENOMEM, try asking for a smaller
mempools. This patch enables OVS DPDK to run on systems without 1GB
hugepages

Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com>
---
 lib/netdev-dpdk.c | 46 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1ba8310..54bc318 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -67,9 +67,23 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
20);
 #define MBUF_SIZE(mtu)       (MTU_TO_MAX_LEN(mtu) + (512) + \
                              sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
 
-/* XXX: mempool size should be based on system resources. */
-#define NB_MBUF              (4096 * 64)
-#define MP_CACHE_SZ          (256 * 2)
+/* Max and min number of packets in the mempool.  OVS tries to allocate a
+ * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
+ * enough hugepages) we keep halving the number until the allocation succeeds
+ * or we reach MIN_NB_MBUF */
+
+#define MAX_NB_MBUF          (4096 * 64)
+#define MIN_NB_MBUF          (4096 * 4)
+#define MP_CACHE_SZ          RTE_MEMPOOL_CACHE_MAX_SIZE
+
+/* MAX_NB_MBUF can be divided by 2 many times, until MIN_NB_MBUF */
+BUILD_ASSERT_DECL(MAX_NB_MBUF % ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF) == 0);
+
+/* The smallest possible NB_MBUF that we're going to try should be a multiple
+ * of MP_CACHE_SZ. This is advised by DPDK documentation. */
+BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF))
+                  % MP_CACHE_SZ == 0);
+
 #define SOCKET0              0
 
 #define NIC_PORT_RX_Q_SIZE 2048  /* Size of Physical NIC RX Queue, Max 
(n+32<=4096)*/
@@ -293,6 +307,7 @@ dpdk_mp_get(int socket_id, int mtu) OVS_REQUIRES(dpdk_mutex)
 {
     struct dpdk_mp *dmp = NULL;
     char mp_name[RTE_MEMPOOL_NAMESIZE];
+    unsigned mp_size;
 
     LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
         if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
@@ -306,20 +321,25 @@ dpdk_mp_get(int socket_id, int mtu) 
OVS_REQUIRES(dpdk_mutex)
     dmp->mtu = mtu;
     dmp->refcount = 1;
 
-    if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d", dmp->mtu,
-                 dmp->socket_id) < 0) {
-        return NULL;
-    }
+    mp_size = MAX_NB_MBUF;
+    do {
+        if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d_%u",
+                     dmp->mtu, dmp->socket_id, mp_size) < 0) {
+            return NULL;
+        }
 
-    dmp->mp = rte_mempool_create(mp_name, NB_MBUF, MBUF_SIZE(mtu),
-                                 MP_CACHE_SZ,
-                                 sizeof(struct rte_pktmbuf_pool_private),
-                                 rte_pktmbuf_pool_init, NULL,
-                                 ovs_rte_pktmbuf_init, NULL,
-                                 socket_id, 0);
+        dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
+                                     MP_CACHE_SZ,
+                                     sizeof(struct rte_pktmbuf_pool_private),
+                                     rte_pktmbuf_pool_init, NULL,
+                                     ovs_rte_pktmbuf_init, NULL,
+                                     socket_id, 0);
+    } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= MIN_NB_MBUF);
 
     if (dmp->mp == NULL) {
         return NULL;
+    } else {
+        VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", mp_name, mp_size );
     }
 
     list_push_back(&dpdk_mp_list, &dmp->list_node);
-- 
2.1.4

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to