I suppose my only question with this patch is why we can't just allocate MIN_NB_MBUF sized pools in the first place.
Acked-by: Ethan Jackson <et...@nicira.com> On Thu, Mar 12, 2015 at 11:04 AM, Daniele Di Proietto <diproiet...@vmware.com> wrote: > If rte_mempool_create() fails with ENOMEM, try asking for a smaller > mempools. This patch enables OVS DPDK to run on systems without 1GB > hugepages > > Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com> > --- > lib/netdev-dpdk.c | 46 +++++++++++++++++++++++++++++++++------------- > 1 file changed, 33 insertions(+), 13 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 1ba8310..54bc318 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -67,9 +67,23 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, > 20); > #define MBUF_SIZE(mtu) (MTU_TO_MAX_LEN(mtu) + (512) + \ > sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM) > > -/* XXX: mempool size should be based on system resources. */ > -#define NB_MBUF (4096 * 64) > -#define MP_CACHE_SZ (256 * 2) > +/* Max and min number of packets in the mempool. OVS tries to allocate a > + * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have > + * enough hugepages) we keep halving the number until the allocation succeeds > + * or we reach MIN_NB_MBUF */ > + > +#define MAX_NB_MBUF (4096 * 64) > +#define MIN_NB_MBUF (4096 * 4) > +#define MP_CACHE_SZ RTE_MEMPOOL_CACHE_MAX_SIZE > + > +/* MAX_NB_MBUF can be divided by 2 many times, until MIN_NB_MBUF */ > +BUILD_ASSERT_DECL(MAX_NB_MBUF % ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF) == > 0); > + > +/* The smallest possible NB_MBUF that we're going to try should be a multiple > + * of MP_CACHE_SZ. This is advised by DPDK documentation. */ > +BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF)) > + % MP_CACHE_SZ == 0); > + > #define SOCKET0 0 > > #define NIC_PORT_RX_Q_SIZE 2048 /* Size of Physical NIC RX Queue, Max > (n+32<=4096)*/ > @@ -293,6 +307,7 @@ dpdk_mp_get(int socket_id, int mtu) > OVS_REQUIRES(dpdk_mutex) > { > struct dpdk_mp *dmp = NULL; > char mp_name[RTE_MEMPOOL_NAMESIZE]; > + unsigned mp_size; > > LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { > if (dmp->socket_id == socket_id && dmp->mtu == mtu) { > @@ -306,20 +321,25 @@ dpdk_mp_get(int socket_id, int mtu) > OVS_REQUIRES(dpdk_mutex) > dmp->mtu = mtu; > dmp->refcount = 1; > > - if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d", dmp->mtu, > - dmp->socket_id) < 0) { > - return NULL; > - } > + mp_size = MAX_NB_MBUF; > + do { > + if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d_%u", > + dmp->mtu, dmp->socket_id, mp_size) < 0) { > + return NULL; > + } > > - dmp->mp = rte_mempool_create(mp_name, NB_MBUF, MBUF_SIZE(mtu), > - MP_CACHE_SZ, > - sizeof(struct rte_pktmbuf_pool_private), > - rte_pktmbuf_pool_init, NULL, > - ovs_rte_pktmbuf_init, NULL, > - socket_id, 0); > + dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu), > + MP_CACHE_SZ, > + sizeof(struct rte_pktmbuf_pool_private), > + rte_pktmbuf_pool_init, NULL, > + ovs_rte_pktmbuf_init, NULL, > + socket_id, 0); > + } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= > MIN_NB_MBUF); > > if (dmp->mp == NULL) { > return NULL; > + } else { > + VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", mp_name, mp_size > ); > } > > list_push_back(&dpdk_mp_list, &dmp->list_node); > -- > 2.1.4 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev