> >Hi Mark, > >Thanks for splitting the clean up, this looks better. >comment below. > > >On Tue, 16 Feb 2016 11:31:48 +0000 >Mark Kavanagh <mark.b.kavan...@intel.com> wrote: > >> Current mbuf initialization relies on magic numbers and does not >> accomodate mbufs of different sizes. >> >> Resolve this issue by ensuring that mbufs are always aligned to a 1k >> boundary (a typical DPDK NIC Rx buffer alignment). >> >> Signed-off-by: Mark Kavanagh <mark.b.kavan...@intel.com> >> --- >> lib/netdev-dpdk.c | 51 ++++++++++++++++++++++++++++++++++++--------------- >> 1 file changed, 36 insertions(+), 15 deletions(-) >> >> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c >> index e4f789b..5bcd36d 100644 >> --- a/lib/netdev-dpdk.c >> +++ b/lib/netdev-dpdk.c >> @@ -69,14 +69,14 @@ static struct vlog_rate_limit rl = >> VLOG_RATE_LIMIT_INIT(5, 20); >> * The minimum mbuf size is limited to avoid scatter behaviour and drop in >> * performance for standard Ethernet MTU. >> */ >> -#define MTU_TO_MAX_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN) >> -#define MBUF_SIZE_MTU(mtu) (MTU_TO_MAX_LEN(mtu) \ >> - + sizeof(struct dp_packet) \ >> - + RTE_PKTMBUF_HEADROOM) >> -#define MBUF_SIZE_DRIVER (2048 \ >> - + sizeof (struct rte_mbuf) \ >> - + RTE_PKTMBUF_HEADROOM) >> -#define MBUF_SIZE(mtu) MAX(MBUF_SIZE_MTU(mtu), MBUF_SIZE_DRIVER) >> +#define ETHER_HDR_MAX_LEN (ETHER_HDR_LEN + ETHER_CRC_LEN + (2 * >VLAN_HEADER_LEN)) >> +#define MTU_TO_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN) >> +#define MTU_TO_MAX_FRAME_LEN(mtu) ((mtu) + ETHER_HDR_MAX_LEN) >> +#define FRAME_LEN_TO_MTU(frame_len) ((frame_len)- ETHER_HDR_LEN - >> ETHER_CRC_LEN) >> +#define MBUF_SIZE(mtu) ( MTU_TO_MAX_FRAME_LEN(mtu) \ >> + + sizeof(struct dp_packet) \ >> + + RTE_PKTMBUF_HEADROOM) >> +#define NETDEV_DPDK_MBUF_ALIGN 1024 >> >> /* Max and min number of packets in the mempool. OVS tries to allocate a >> * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have >> @@ -252,6 +252,22 @@ is_dpdk_class(const struct netdev_class *class) >> return class->construct == netdev_dpdk_construct; >> } >> >> +/* DPDK NIC drivers allocate RX buffers at a particular granularity, >> typically >> + * aligned at 1k or less. If a declared mbuf size is not a multiple of this >> + * value, insufficient buffers are allocated to accomodate the packet in its >> + * entirety. Furthermore, certain drivers need to ensure that there is also >> + * sufficient space in the Rx buffer to accommodate two VLAN tags (for QinQ >> + * frames). If the RX buffer is too small, then the driver enables scatter >> RX >> + * behaviour, which reduces performance. To prevent this, use a buffer size >> that >> + * is closest to 'mtu', but which satisfies the aforementioned criteria. >> + */ >> +static uint32_t >> +dpdk_buf_size(int mtu) >> +{ >> + return ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) + RTE_PKTMBUF_HEADROOM), >> + NETDEV_DPDK_MBUF_ALIGN); >> +} >> + >> /* XXX: use dpdk malloc for entire OVS. in fact huge page should be used >> * for all other segments data, bss and text. */ >> >> @@ -313,7 +329,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp, >> { >> struct rte_mbuf *m = _m; >> >> - __rte_pktmbuf_init(mp, opaque_arg, _m, i); >> + rte_pktmbuf_init(mp, opaque_arg, _m, i); > > >Nice, but at this point __rte_pktmbuf_init() is not used anymore. >Although it is removed in the second patch, it makes sense to be >part of this patch instead.
Thanks Flavio - I missed during the split. I'll add it back into this patch, as originally intended. > > > >> dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len); >> } >> @@ -324,6 +340,7 @@ dpdk_mp_get(int socket_id, int mtu) >> OVS_REQUIRES(dpdk_mutex) >> struct dpdk_mp *dmp = NULL; >> char mp_name[RTE_MEMPOOL_NAMESIZE]; >> unsigned mp_size; >> + struct rte_pktmbuf_pool_private mbp_priv; >> >> LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { >> if (dmp->socket_id == socket_id && dmp->mtu == mtu) { >> @@ -336,6 +353,8 @@ dpdk_mp_get(int socket_id, int mtu) >> OVS_REQUIRES(dpdk_mutex) >> dmp->socket_id = socket_id; >> dmp->mtu = mtu; >> dmp->refcount = 1; >> + mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct >> dp_packet); >> + mbp_priv.mbuf_priv_size = sizeof (struct dp_packet) - sizeof (struct >> rte_mbuf); >> >> mp_size = MAX_NB_MBUF; >> do { >> @@ -347,7 +366,7 @@ dpdk_mp_get(int socket_id, int mtu) >> OVS_REQUIRES(dpdk_mutex) >> dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu), >> MP_CACHE_SZ, >> sizeof(struct >> rte_pktmbuf_pool_private), >> - rte_pktmbuf_pool_init, NULL, >> + rte_pktmbuf_pool_init, &mbp_priv, >> ovs_rte_pktmbuf_init, NULL, >> socket_id, 0); >> } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= >> MIN_NB_MBUF); >> @@ -584,6 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int >> port_no, >> struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); >> int sid; >> int err = 0; >> + uint32_t buf_size; >> >> ovs_mutex_init(&netdev->mutex); >> ovs_mutex_lock(&netdev->mutex); >> @@ -604,9 +624,10 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int >> port_no, >> netdev->type = type; >> netdev->flags = 0; >> netdev->mtu = ETHER_MTU; >> - netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu); >> + netdev->max_packet_len = ETHER_MAX_LEN; > >The value is correct but in other places we are using the macro >MTU_TO_FRAME_LEN(dev->mtu). > > >> - netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu); >> + buf_size = dpdk_buf_size(netdev->mtu); >> + netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, >> FRAME_LEN_TO_MTU(buf_size)); > >This is inconsistent with netdev_dpdk_set_mtu() which calls dpdk_mp_get() >passing dev->mtu directly. Shouldn't that be fixed too? > > >> if (!netdev->dpdk_mp) { >> err = ENOMEM; >> goto unlock; >> @@ -1440,14 +1461,14 @@ netdev_dpdk_set_mtu(const struct netdev *netdev, int >> mtu) >> old_mp = dev->dpdk_mp; >> dev->dpdk_mp = mp; >> dev->mtu = mtu; >> - dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu); >> + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); >> >> err = dpdk_eth_dev_init(dev); >> if (err) { >> dpdk_mp_put(mp); >> dev->mtu = old_mtu; >> dev->dpdk_mp = old_mp; >> - dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu); >> + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); >> dpdk_eth_dev_init(dev); >> goto out; >> } >> @@ -1736,7 +1757,7 @@ netdev_dpdk_get_status(const struct netdev *netdev_, >> struct smap >*args) >> smap_add_format(args, "numa_id", "%d", >> rte_eth_dev_socket_id(dev->port_id)); >> smap_add_format(args, "driver_name", "%s", dev_info.driver_name); >> smap_add_format(args, "min_rx_bufsize", "%u", dev_info.min_rx_bufsize); >> - smap_add_format(args, "max_rx_pktlen", "%u", dev_info.max_rx_pktlen); >> + smap_add_format(args, "max_rx_pktlen", "%u", dev->max_packet_len); >> smap_add_format(args, "max_rx_queues", "%u", dev_info.max_rx_queues); >> smap_add_format(args, "max_tx_queues", "%u", dev_info.max_tx_queues); >> smap_add_format(args, "max_mac_addrs", "%u", dev_info.max_mac_addrs); > >Thanks, >-- >fbl _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev