Hey Daniele,

If you're happy with this patch, could you add your 'signed-off-by', seeing as 
you contributed code towards it?

Thanks in advance,
Mark

>Current mbuf initialization relies on magic numbers and does not
>accomodate mbufs of different sizes.
>
>Resolve this issue by ensuring that mbufs are always aligned to a 1k
>boundary (a typical DPDK NIC Rx buffer alignment).
>
>Signed-off-by: Mark Kavanagh <mark.b.kavan...@intel.com>
>---
> lib/netdev-dpdk.c | 51 ++++++++++++++++++++++++++++++++++++---------------
> 1 file changed, 36 insertions(+), 15 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>index e4f789b..5bcd36d 100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -69,14 +69,14 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
>20);
>  * The minimum mbuf size is limited to avoid scatter behaviour and drop in
>  * performance for standard Ethernet MTU.
>  */
>-#define MTU_TO_MAX_LEN(mtu)  ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
>-#define MBUF_SIZE_MTU(mtu)   (MTU_TO_MAX_LEN(mtu)        \
>-                              + sizeof(struct dp_packet) \
>-                              + RTE_PKTMBUF_HEADROOM)
>-#define MBUF_SIZE_DRIVER     (2048                       \
>-                              + sizeof (struct rte_mbuf) \
>-                              + RTE_PKTMBUF_HEADROOM)
>-#define MBUF_SIZE(mtu)       MAX(MBUF_SIZE_MTU(mtu), MBUF_SIZE_DRIVER)
>+#define ETHER_HDR_MAX_LEN           (ETHER_HDR_LEN + ETHER_CRC_LEN + (2 * 
>VLAN_HEADER_LEN))
>+#define MTU_TO_FRAME_LEN(mtu)       ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
>+#define MTU_TO_MAX_FRAME_LEN(mtu)   ((mtu) + ETHER_HDR_MAX_LEN)
>+#define FRAME_LEN_TO_MTU(frame_len) ((frame_len)- ETHER_HDR_LEN - 
>ETHER_CRC_LEN)
>+#define MBUF_SIZE(mtu)              ( MTU_TO_MAX_FRAME_LEN(mtu)   \
>+                                    + sizeof(struct dp_packet)    \
>+                                    + RTE_PKTMBUF_HEADROOM)
>+#define NETDEV_DPDK_MBUF_ALIGN      1024
>
> /* Max and min number of packets in the mempool.  OVS tries to allocate a
>  * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
>@@ -252,6 +252,22 @@ is_dpdk_class(const struct netdev_class *class)
>     return class->construct == netdev_dpdk_construct;
> }
>
>+/* DPDK NIC drivers allocate RX buffers at a particular granularity, typically
>+ * aligned at 1k or less. If a declared mbuf size is not a multiple of this
>+ * value, insufficient buffers are allocated to accomodate the packet in its
>+ * entirety. Furthermore, certain drivers need to ensure that there is also
>+ * sufficient space in the Rx buffer to accommodate two VLAN tags (for QinQ
>+ * frames). If the RX buffer is too small, then the driver enables scatter RX
>+ * behaviour, which reduces performance. To prevent this, use a buffer size 
>that
>+ * is closest to 'mtu', but which satisfies the aforementioned criteria.
>+ */
>+static uint32_t
>+dpdk_buf_size(int mtu)
>+{
>+    return ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) + RTE_PKTMBUF_HEADROOM),
>+                     NETDEV_DPDK_MBUF_ALIGN);
>+}
>+
> /* XXX: use dpdk malloc for entire OVS. in fact huge page should be used
>  * for all other segments data, bss and text. */
>
>@@ -313,7 +329,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
> {
>     struct rte_mbuf *m = _m;
>
>-    __rte_pktmbuf_init(mp, opaque_arg, _m, i);
>+    rte_pktmbuf_init(mp, opaque_arg, _m, i);
>
>     dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len);
> }
>@@ -324,6 +340,7 @@ dpdk_mp_get(int socket_id, int mtu) 
>OVS_REQUIRES(dpdk_mutex)
>     struct dpdk_mp *dmp = NULL;
>     char mp_name[RTE_MEMPOOL_NAMESIZE];
>     unsigned mp_size;
>+    struct rte_pktmbuf_pool_private mbp_priv;
>
>     LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
>         if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
>@@ -336,6 +353,8 @@ dpdk_mp_get(int socket_id, int mtu) 
>OVS_REQUIRES(dpdk_mutex)
>     dmp->socket_id = socket_id;
>     dmp->mtu = mtu;
>     dmp->refcount = 1;
>+    mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct dp_packet);
>+    mbp_priv.mbuf_priv_size = sizeof (struct dp_packet) - sizeof (struct 
>rte_mbuf);
>
>     mp_size = MAX_NB_MBUF;
>     do {
>@@ -347,7 +366,7 @@ dpdk_mp_get(int socket_id, int mtu) 
>OVS_REQUIRES(dpdk_mutex)
>         dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
>                                      MP_CACHE_SZ,
>                                      sizeof(struct rte_pktmbuf_pool_private),
>-                                     rte_pktmbuf_pool_init, NULL,
>+                                     rte_pktmbuf_pool_init, &mbp_priv,
>                                      ovs_rte_pktmbuf_init, NULL,
>                                      socket_id, 0);
>     } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= 
> MIN_NB_MBUF);
>@@ -584,6 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
>port_no,
>     struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>     int sid;
>     int err = 0;
>+    uint32_t buf_size;
>
>     ovs_mutex_init(&netdev->mutex);
>     ovs_mutex_lock(&netdev->mutex);
>@@ -604,9 +624,10 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
>port_no,
>     netdev->type = type;
>     netdev->flags = 0;
>     netdev->mtu = ETHER_MTU;
>-    netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu);
>+    netdev->max_packet_len = ETHER_MAX_LEN;
>
>-    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu);
>+    buf_size = dpdk_buf_size(netdev->mtu);
>+    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, 
>FRAME_LEN_TO_MTU(buf_size));
>     if (!netdev->dpdk_mp) {
>         err = ENOMEM;
>         goto unlock;
>@@ -1440,14 +1461,14 @@ netdev_dpdk_set_mtu(const struct netdev *netdev, int 
>mtu)
>     old_mp = dev->dpdk_mp;
>     dev->dpdk_mp = mp;
>     dev->mtu = mtu;
>-    dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu);
>+    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>
>     err = dpdk_eth_dev_init(dev);
>     if (err) {
>         dpdk_mp_put(mp);
>         dev->mtu = old_mtu;
>         dev->dpdk_mp = old_mp;
>-        dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu);
>+        dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>         dpdk_eth_dev_init(dev);
>         goto out;
>     }
>@@ -1736,7 +1757,7 @@ netdev_dpdk_get_status(const struct netdev *netdev_, 
>struct smap *args)
>     smap_add_format(args, "numa_id", "%d", 
> rte_eth_dev_socket_id(dev->port_id));
>     smap_add_format(args, "driver_name", "%s", dev_info.driver_name);
>     smap_add_format(args, "min_rx_bufsize", "%u", dev_info.min_rx_bufsize);
>-    smap_add_format(args, "max_rx_pktlen", "%u", dev_info.max_rx_pktlen);
>+    smap_add_format(args, "max_rx_pktlen", "%u", dev->max_packet_len);
>     smap_add_format(args, "max_rx_queues", "%u", dev_info.max_rx_queues);
>     smap_add_format(args, "max_tx_queues", "%u", dev_info.max_tx_queues);
>     smap_add_format(args, "max_mac_addrs", "%u", dev_info.max_mac_addrs);
>--
>1.9.3
>
>_______________________________________________
>dev mailing list
>dev@openvswitch.org
>http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to