>
>Hi Mark,
>
>Thanks for splitting the clean up, this looks better.
>comment below.
>
>
>On Tue, 16 Feb 2016 11:31:48 +0000
>Mark Kavanagh <mark.b.kavan...@intel.com> wrote:
>
>> Current mbuf initialization relies on magic numbers and does not
>> accomodate mbufs of different sizes.
>>
>> Resolve this issue by ensuring that mbufs are always aligned to a 1k
>> boundary (a typical DPDK NIC Rx buffer alignment).
>>
>> Signed-off-by: Mark Kavanagh <mark.b.kavan...@intel.com>
>> ---
>>  lib/netdev-dpdk.c | 51 ++++++++++++++++++++++++++++++++++++---------------
>>  1 file changed, 36 insertions(+), 15 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index e4f789b..5bcd36d 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -69,14 +69,14 @@ static struct vlog_rate_limit rl = 
>> VLOG_RATE_LIMIT_INIT(5, 20);
>>   * The minimum mbuf size is limited to avoid scatter behaviour and drop in
>>   * performance for standard Ethernet MTU.
>>   */
>> -#define MTU_TO_MAX_LEN(mtu)  ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
>> -#define MBUF_SIZE_MTU(mtu)   (MTU_TO_MAX_LEN(mtu)        \
>> -                              + sizeof(struct dp_packet) \
>> -                              + RTE_PKTMBUF_HEADROOM)
>> -#define MBUF_SIZE_DRIVER     (2048                       \
>> -                              + sizeof (struct rte_mbuf) \
>> -                              + RTE_PKTMBUF_HEADROOM)
>> -#define MBUF_SIZE(mtu)       MAX(MBUF_SIZE_MTU(mtu), MBUF_SIZE_DRIVER)
>> +#define ETHER_HDR_MAX_LEN           (ETHER_HDR_LEN + ETHER_CRC_LEN + (2 *
>VLAN_HEADER_LEN))
>> +#define MTU_TO_FRAME_LEN(mtu)       ((mtu) + ETHER_HDR_LEN + ETHER_CRC_LEN)
>> +#define MTU_TO_MAX_FRAME_LEN(mtu)   ((mtu) + ETHER_HDR_MAX_LEN)
>> +#define FRAME_LEN_TO_MTU(frame_len) ((frame_len)- ETHER_HDR_LEN - 
>> ETHER_CRC_LEN)
>> +#define MBUF_SIZE(mtu)              ( MTU_TO_MAX_FRAME_LEN(mtu)   \
>> +                                    + sizeof(struct dp_packet)    \
>> +                                    + RTE_PKTMBUF_HEADROOM)
>> +#define NETDEV_DPDK_MBUF_ALIGN      1024
>>
>>  /* Max and min number of packets in the mempool.  OVS tries to allocate a
>>   * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
>> @@ -252,6 +252,22 @@ is_dpdk_class(const struct netdev_class *class)
>>      return class->construct == netdev_dpdk_construct;
>>  }
>>
>> +/* DPDK NIC drivers allocate RX buffers at a particular granularity, 
>> typically
>> + * aligned at 1k or less. If a declared mbuf size is not a multiple of this
>> + * value, insufficient buffers are allocated to accomodate the packet in its
>> + * entirety. Furthermore, certain drivers need to ensure that there is also
>> + * sufficient space in the Rx buffer to accommodate two VLAN tags (for QinQ
>> + * frames). If the RX buffer is too small, then the driver enables scatter 
>> RX
>> + * behaviour, which reduces performance. To prevent this, use a buffer size 
>> that
>> + * is closest to 'mtu', but which satisfies the aforementioned criteria.
>> + */
>> +static uint32_t
>> +dpdk_buf_size(int mtu)
>> +{
>> +    return ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) + RTE_PKTMBUF_HEADROOM),
>> +                     NETDEV_DPDK_MBUF_ALIGN);
>> +}
>> +
>>  /* XXX: use dpdk malloc for entire OVS. in fact huge page should be used
>>   * for all other segments data, bss and text. */
>>
>> @@ -313,7 +329,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp,
>>  {
>>      struct rte_mbuf *m = _m;
>>
>> -    __rte_pktmbuf_init(mp, opaque_arg, _m, i);
>> +    rte_pktmbuf_init(mp, opaque_arg, _m, i);
>
>
>Nice, but at this point __rte_pktmbuf_init() is not used anymore.
>Although it is removed in the second patch, it makes sense to be
>part of this patch instead.

Thanks Flavio - I missed during the split. I'll add it back into this patch, as 
originally intended.
 
>
>
>
>>      dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len);
>>  }
>> @@ -324,6 +340,7 @@ dpdk_mp_get(int socket_id, int mtu) 
>> OVS_REQUIRES(dpdk_mutex)
>>      struct dpdk_mp *dmp = NULL;
>>      char mp_name[RTE_MEMPOOL_NAMESIZE];
>>      unsigned mp_size;
>> +    struct rte_pktmbuf_pool_private mbp_priv;
>>
>>      LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) {
>>          if (dmp->socket_id == socket_id && dmp->mtu == mtu) {
>> @@ -336,6 +353,8 @@ dpdk_mp_get(int socket_id, int mtu) 
>> OVS_REQUIRES(dpdk_mutex)
>>      dmp->socket_id = socket_id;
>>      dmp->mtu = mtu;
>>      dmp->refcount = 1;
>> +    mbp_priv.mbuf_data_room_size = MBUF_SIZE(mtu) - sizeof(struct 
>> dp_packet);
>> +    mbp_priv.mbuf_priv_size = sizeof (struct dp_packet) - sizeof (struct 
>> rte_mbuf);
>>
>>      mp_size = MAX_NB_MBUF;
>>      do {
>> @@ -347,7 +366,7 @@ dpdk_mp_get(int socket_id, int mtu) 
>> OVS_REQUIRES(dpdk_mutex)
>>          dmp->mp = rte_mempool_create(mp_name, mp_size, MBUF_SIZE(mtu),
>>                                       MP_CACHE_SZ,
>>                                       sizeof(struct 
>> rte_pktmbuf_pool_private),
>> -                                     rte_pktmbuf_pool_init, NULL,
>> +                                     rte_pktmbuf_pool_init, &mbp_priv,
>>                                       ovs_rte_pktmbuf_init, NULL,
>>                                       socket_id, 0);
>>      } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= 
>> MIN_NB_MBUF);
>> @@ -584,6 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
>> port_no,
>>      struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>>      int sid;
>>      int err = 0;
>> +    uint32_t buf_size;
>>
>>      ovs_mutex_init(&netdev->mutex);
>>      ovs_mutex_lock(&netdev->mutex);
>> @@ -604,9 +624,10 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
>> port_no,
>>      netdev->type = type;
>>      netdev->flags = 0;
>>      netdev->mtu = ETHER_MTU;
>> -    netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu);
>> +    netdev->max_packet_len = ETHER_MAX_LEN;
>
>The value is correct but in other places we are using the macro
>MTU_TO_FRAME_LEN(dev->mtu).
>
>
>> -    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu);
>> +    buf_size = dpdk_buf_size(netdev->mtu);
>> +    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, 
>> FRAME_LEN_TO_MTU(buf_size));
>
>This is inconsistent with netdev_dpdk_set_mtu() which calls dpdk_mp_get()
>passing dev->mtu directly. Shouldn't that be fixed too?
>
>
>>      if (!netdev->dpdk_mp) {
>>          err = ENOMEM;
>>          goto unlock;
>> @@ -1440,14 +1461,14 @@ netdev_dpdk_set_mtu(const struct netdev *netdev, int 
>> mtu)
>>      old_mp = dev->dpdk_mp;
>>      dev->dpdk_mp = mp;
>>      dev->mtu = mtu;
>> -    dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu);
>> +    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>>
>>      err = dpdk_eth_dev_init(dev);
>>      if (err) {
>>          dpdk_mp_put(mp);
>>          dev->mtu = old_mtu;
>>          dev->dpdk_mp = old_mp;
>> -        dev->max_packet_len = MTU_TO_MAX_LEN(dev->mtu);
>> +        dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
>>          dpdk_eth_dev_init(dev);
>>          goto out;
>>      }
>> @@ -1736,7 +1757,7 @@ netdev_dpdk_get_status(const struct netdev *netdev_, 
>> struct smap
>*args)
>>      smap_add_format(args, "numa_id", "%d", 
>> rte_eth_dev_socket_id(dev->port_id));
>>      smap_add_format(args, "driver_name", "%s", dev_info.driver_name);
>>      smap_add_format(args, "min_rx_bufsize", "%u", dev_info.min_rx_bufsize);
>> -    smap_add_format(args, "max_rx_pktlen", "%u", dev_info.max_rx_pktlen);
>> +    smap_add_format(args, "max_rx_pktlen", "%u", dev->max_packet_len);
>>      smap_add_format(args, "max_rx_queues", "%u", dev_info.max_rx_queues);
>>      smap_add_format(args, "max_tx_queues", "%u", dev_info.max_tx_queues);
>>      smap_add_format(args, "max_mac_addrs", "%u", dev_info.max_mac_addrs);
>
>Thanks,
>--
>fbl

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to