I suppose my only question with this patch is why we can't just
allocate MIN_NB_MBUF sized pools in the first place.

Acked-by: Ethan Jackson <et...@nicira.com>


On Thu, Mar 12, 2015 at 11:04 AM, Daniele Di Proietto
<diproiet...@vmware.com> wrote:
> If rte_mempool_create() fails with ENOMEM, try asking for a smaller
> mempools. This patch enables OVS DPDK to run on systems without 1GB
> hugepages
>
> Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com>
> ---
>  lib/netdev-dpdk.c | 46 +++++++++++++++++++++++++++++++++-------------
>  1 file changed, 33 insertions(+), 13 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 1ba8310..54bc318 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -67,9 +67,23 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
> 20);
>  #define MBUF_SIZE(mtu)       (MTU_TO_MAX_LEN(mtu) + (512) + \
>                               sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
>
> -/* XXX: mempool size should be based on system resources. */
> -#define NB_MBUF              (4096 * 64)
> -#define MP_CACHE_SZ          (256 * 2)
> +/* Max and min number of packets in the mempool.  OVS tries to allocate a
> + * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
> + * enough hugepages) we keep halving the number until the allocation succeeds
> + * or we reach MIN_NB_MBUF */
> +
> +#define MAX_NB_MBUF          (4096 * 64)
> +#define MIN_NB_MBUF          (4096 * 4)
> +#define MP_CACHE_SZ          RTE_MEMPOOL_CACHE_MAX_SIZE
> +
> +/* MAX_NB_MBUF can be divided by 2 many times, until MIN_NB_MBUF */
> +BUILD_ASSERT_DECL(MAX_NB_MBUF % ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF) == 
> 0);
> +
> +/* The smallest possible NB_MBUF that we're going to try should be a multiple
> + * of MP_CACHE_SZ. This is advised by DPDK documentation. */
> +BUILD_ASSERT_DECL((MAX_NB_MBUF / ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF))
> +                  % MP_CACHE_SZ == 0);
> +
>  #define SOCKET0              0
>
>  #define NIC_PORT_RX_Q_SIZE 2048  /* Size of Physical NIC RX Queue, Max 
> (n+32<=4096)*/
> @@ -293,6 +307,7 @@ dpdk_mp_get(int socket_id, int mtu) 
> OVS_REQUIRES(dpdk_mutex)
>  {
>      struct dpdk_mp *dmp = NULL;
>      char mp_name[RTE_MEMPOOL_NAMESIZE];
> +    unsigned mp_size;
>
>      LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
>          if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
> @@ -306,20 +321,25 @@ dpdk_mp_get(int socket_id, int mtu) 
> OVS_REQUIRES(dpdk_mutex)
>      dmp->mtu = mtu;
>      dmp->refcount = 1;
>
> -    if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d", dmp->mtu,
> -                 dmp->socket_id) < 0) {
> -        return NULL;
> -    }
> +    mp_size = MAX_NB_MBUF;
> +    do {
> +        if (snprintf(mp_name, RTE_MEMPOOL_NAMESIZE, "ovs_mp_%d_%d_%u",
> +                     dmp->mtu, dmp->socket_id, mp_size) < 0) {
> +            return NULL;
> +        }
>
> -    dmp->mp = rte_mempool_create(mp_name, NB_MBUF, MBUF_SIZE(mtu),
> -                                 MP_CACHE_SZ,
> -                                 sizeof(struct rte_pktmbuf_pool_private),
> -                                 rte_pktmbuf_pool_init, NULL,
> -                                 ovs_rte_pktmbuf_init, NULL,
> -                                 socket_id, 0);
> +        dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
> +                                     MP_CACHE_SZ,
> +                                     sizeof(struct rte_pktmbuf_pool_private),
> +                                     rte_pktmbuf_pool_init, NULL,
> +                                     ovs_rte_pktmbuf_init, NULL,
> +                                     socket_id, 0);
> +    } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= 
> MIN_NB_MBUF);
>
>      if (dmp->mp == NULL) {
>          return NULL;
> +    } else {
> +        VLOG_DBG("Allocated \"%s\" mempool with %u mbufs", mp_name, mp_size 
> );
>      }
>
>      list_push_back(&dpdk_mp_list, &dmp->list_node);
> --
> 2.1.4
>
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to