On Tue, 16 Feb 2016 11:31:49 +0000
Mark Kavanagh <mark.b.kavan...@intel.com> wrote:

> Add support for Jumbo Frames to DPDK-enabled port types,
> using single-segment-mbufs.
> 
> Using this approach, the amount of memory allocated for each mbuf
> to store frame data is increased to a value greater than 1518B
> (typical Ethernet maximum frame length). The increased space
> available in the mbuf means that an entire Jumbo Frame can be carried
> in a single mbuf, as opposed to partitioning it across multiple mbuf
> segments.
> 
> The amount of space allocated to each mbuf to hold frame data is
> defined dynamically by the user when adding a DPDK port to a bridge.
> If an MTU value is not supplied, or the user-supplied value is invalid,
> the MTU for the port defaults to standard Ethernet MTU (i.e. 1500B).
> 
> Signed-off-by: Mark Kavanagh <mark.b.kavan...@intel.com>
> ---
>  INSTALL.DPDK.md   |   1 -
>  lib/netdev-dpdk.c | 259 
> +++++++++++++++++++++++++++++++++---------------------
>  2 files changed, 159 insertions(+), 101 deletions(-)
> 
> diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md
> index d892788..722fb9e 100644
> --- a/INSTALL.DPDK.md
> +++ b/INSTALL.DPDK.md
> @@ -881,7 +881,6 @@ determines how many queues can be used by the guest.
>  Restrictions:
>  -------------
>  
> -  - Work with 1500 MTU, needs few changes in DPDK lib to fix this issue.
>    - Currently DPDK port does not make use any offload functionality.
>    - DPDK-vHost support works with 1G huge pages.
>  
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 5bcd36d..93a0930 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -77,6 +77,7 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
> 20);
>                                      + sizeof(struct dp_packet)    \
>                                      + RTE_PKTMBUF_HEADROOM)
>  #define NETDEV_DPDK_MBUF_ALIGN      1024
> +#define NETDEV_DPDK_MAX_FRAME_LEN   13312
>  
>  /* Max and min number of packets in the mempool.  OVS tries to allocate a
>   * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have
> @@ -294,34 +295,6 @@ free_dpdk_buf(struct dp_packet *p)
>  }
>  
>  static void
> -__rte_pktmbuf_init(struct rte_mempool *mp,
> -                   void *opaque_arg OVS_UNUSED,
> -                   void *_m,
> -                   unsigned i OVS_UNUSED)
> -{
> -    struct rte_mbuf *m = _m;
> -    uint32_t buf_len = mp->elt_size - sizeof(struct dp_packet);
> -
> -    RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct dp_packet));
> -
> -    memset(m, 0, mp->elt_size);
> -
> -    /* start of buffer is just after mbuf structure */
> -    m->buf_addr = (char *)m + sizeof(struct dp_packet);
> -    m->buf_physaddr = rte_mempool_virt2phy(mp, m) +
> -                    sizeof(struct dp_packet);
> -    m->buf_len = (uint16_t)buf_len;
> -
> -    /* keep some headroom between start of buffer and data */
> -    m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len);
> -
> -    /* init some constant fields */
> -    m->pool = mp;
> -    m->nb_segs = 1;
> -    m->port = 0xff;
> -}
> -
> -static void
>  ovs_rte_pktmbuf_init(struct rte_mempool *mp,
>                       void *opaque_arg OVS_UNUSED,
>                       void *_m,
> @@ -450,6 +423,7 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
> n_rxq, int n_txq)
>  {
>      int diag = 0;
>      int i;
> +    struct rte_eth_conf conf = port_conf;
>  
>      /* A device may report more queues than it makes available (this has
>       * been observed for Intel xl710, which reserves some of them for
> @@ -461,7 +435,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int 
> n_rxq, int n_txq)
>              VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq);
>          }
>  
> -        diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &port_conf);
> +        if (dev->mtu > ETHER_MTU) {
> +            conf.rxmode.jumbo_frame = 1;
> +            conf.rxmode.max_rx_pkt_len = dev->max_packet_len;
> +        } else {
> +            conf.rxmode.jumbo_frame = 0;
> +            conf.rxmode.max_rx_pkt_len = 0;
> +        }
> +
> +        diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &conf);
>          if (diag) {
>              break;
>          }
> @@ -602,8 +584,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
> port_no,
>  {
>      struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_);
>      int sid;
> -    int err = 0;
> -    uint32_t buf_size;
>  
>      ovs_mutex_init(&netdev->mutex);
>      ovs_mutex_lock(&netdev->mutex);
> @@ -623,15 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
> port_no,
>      netdev->port_id = port_no;
>      netdev->type = type;
>      netdev->flags = 0;
> -    netdev->mtu = ETHER_MTU;
> -    netdev->max_packet_len = ETHER_MAX_LEN;
> -
> -    buf_size = dpdk_buf_size(netdev->mtu);
> -    netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, 
> FRAME_LEN_TO_MTU(buf_size));
> -    if (!netdev->dpdk_mp) {
> -        err = ENOMEM;
> -        goto unlock;
> -    }
> +    netdev->mtu = 0;

This zero is special because it means we can set MTU otherwise
the mtu_request is just ignored, so maybe we could define
MTU_NOT_SET or something like that.


>      netdev_->n_txq = NR_QUEUE;
>      netdev_->n_rxq = NR_QUEUE;
> @@ -640,20 +612,12 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int 
> port_no,
>  
>      if (type == DPDK_DEV_ETH) {
>          netdev_dpdk_alloc_txq(netdev, NR_QUEUE);
> -        err = dpdk_eth_dev_init(netdev);
> -        if (err) {
> -            goto unlock;
> -        }
>      }
>  
>      list_push_back(&dpdk_list, &netdev->list_node);
>  
> -unlock:
> -    if (err) {
> -        rte_free(netdev->tx_q);
> -    }
>      ovs_mutex_unlock(&netdev->mutex);
> -    return err;
> +    return 0;
>  }
>  
>  static int
> @@ -671,6 +635,27 @@ dpdk_dev_parse_name(const char dev_name[], const char 
> prefix[],
>      return 0;
>  }
>  
> +static void
> +dpdk_dev_parse_mtu(const struct smap *args, int *mtu)
> +{
> +    const char *mtu_str = smap_get(args, "mtu_request");
> +    char *end_ptr = NULL;
> +    int local_mtu;
> +
> +    if (mtu_str) {
> +        local_mtu = strtoul(mtu_str, &end_ptr, 0);
> +    }
> +    if (!mtu_str || local_mtu < ETHER_MTU ||
> +        local_mtu > FRAME_LEN_TO_MTU(NETDEV_DPDK_MAX_FRAME_LEN) ||
> +        *end_ptr != '\0') {
> +        local_mtu = ETHER_MTU;
> +        VLOG_WARN("Invalid or missing mtu_request parameter - defaulting to 
> %d.\n",
> +                   local_mtu);

This prints a warning message by default.  It should only tell if the MTU
is invalid and perhaps if the mtu different than the default as an
informative message.


> +    }
> +
> +    *mtu = local_mtu;
> +}
> +
>  static int
>  vhost_construct_helper(struct netdev *netdev_) OVS_REQUIRES(dpdk_mutex)
>  {
> @@ -801,15 +786,72 @@ netdev_dpdk_get_config(const struct netdev *netdev, 
> struct smap *args)
>      smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq);
>      smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq);
>      smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq);
> +    smap_add_format(args, "mtu", "%d", dev->mtu);
>      ovs_mutex_unlock(&dev->mutex);
>  
>      return 0;
>  }
>  
> +/* Set the mtu of DPDK_DEV_ETH ports */
> +static int
> +netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    int err, dpdk_mtu;
> +    uint32_t buf_size;
> +    struct dpdk_mp *mp;
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    ovs_mutex_lock(&dev->mutex);
> +    if (dev->mtu == mtu) {
> +        err = 0;
> +        goto out;
> +    }
> +
> +    buf_size = dpdk_buf_size(mtu);
> +    dpdk_mtu = FRAME_LEN_TO_MTU(buf_size);
> +
> +    mp = dpdk_mp_get(dev->socket_id, dpdk_mtu);
> +    if (!mp) {
> +        err = ENOMEM;
> +        goto out;
> +    }
> +
> +    rte_eth_dev_stop(dev->port_id);
> +
> +    dev->dpdk_mp = mp;
> +    dev->mtu = mtu;
> +    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
> +
> +    err = dpdk_eth_dev_init(dev);
> +    if (err) {
> +        VLOG_WARN("Unable to set MTU '%d' for '%s'; falling back to default "
> +                  "MTU '%d'\n", mtu, dev->up.name, ETHER_MTU);
> +        dpdk_mp_put(mp);
> +        dev->mtu = ETHER_MTU;
> +        mp = dpdk_mp_get(dev->socket_id, dev->mtu);
> +        if (!mp) {
> +            err = ENOMEM;
> +            goto out;
> +        }
> +        dev->dpdk_mp = mp;
> +        dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
> +        dpdk_eth_dev_init(dev);
> +        goto out;
> +    } else {
> +        netdev_change_seq_changed(netdev);
> +    }
> +out:
> +    ovs_mutex_unlock(&dev->mutex);
> +    ovs_mutex_unlock(&dpdk_mutex);
> +    return err;
> +}
> +
>  static int
>  netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args)
>  {
>      struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    int mtu;
>  
>      ovs_mutex_lock(&dev->mutex);
>      netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
> @@ -817,6 +859,10 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
> struct smap *args)
>      netdev_change_seq_changed(netdev);
>      ovs_mutex_unlock(&dev->mutex);
>  
> +    if (!dev->mtu) {
> +        dpdk_dev_parse_mtu(args, &mtu);
> +        return netdev_dpdk_set_mtu(netdev, mtu);
> +    }

Can we actually check if dev->mtu is different from mtu_request and then
print a warning if so? Because the requested config will be ignored.


>      return 0;
>  }
>  
> @@ -1435,53 +1481,6 @@ netdev_dpdk_get_mtu(const struct netdev *netdev, int 
> *mtup)
>  }
>  
>  static int
> -netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu)
> -{
> -    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -    int old_mtu, err;
> -    struct dpdk_mp *old_mp;
> -    struct dpdk_mp *mp;
> -
> -    ovs_mutex_lock(&dpdk_mutex);
> -    ovs_mutex_lock(&dev->mutex);
> -    if (dev->mtu == mtu) {
> -        err = 0;
> -        goto out;
> -    }
> -
> -    mp = dpdk_mp_get(dev->socket_id, dev->mtu);
> -    if (!mp) {
> -        err = ENOMEM;
> -        goto out;
> -    }
> -
> -    rte_eth_dev_stop(dev->port_id);
> -
> -    old_mtu = dev->mtu;
> -    old_mp = dev->dpdk_mp;
> -    dev->dpdk_mp = mp;
> -    dev->mtu = mtu;
> -    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
> -
> -    err = dpdk_eth_dev_init(dev);
> -    if (err) {
> -        dpdk_mp_put(mp);
> -        dev->mtu = old_mtu;
> -        dev->dpdk_mp = old_mp;
> -        dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
> -        dpdk_eth_dev_init(dev);
> -        goto out;
> -    }
> -
> -    dpdk_mp_put(old_mp);
> -    netdev_change_seq_changed(netdev);
> -out:
> -    ovs_mutex_unlock(&dev->mutex);
> -    ovs_mutex_unlock(&dpdk_mutex);
> -    return err;
> -}
> -
> -static int
>  netdev_dpdk_get_carrier(const struct netdev *netdev_, bool *carrier);
>  
>  static int
> @@ -2024,6 +2023,57 @@ dpdk_vhost_user_class_init(void)
>      return 0;
>  }
>  
> +/* Set the mtu of DPDK_DEV_VHOST ports */
> +static int
> +netdev_dpdk_vhost_set_mtu(const struct netdev *netdev, int mtu)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    int err = 0;
> +    struct dpdk_mp *mp;
> +
> +    ovs_mutex_lock(&dpdk_mutex);
> +    ovs_mutex_lock(&dev->mutex);
> +    if (dev->mtu == mtu) {
> +        err = 0;
> +        goto out;
> +    }
> +
> +    mp = dpdk_mp_get(dev->socket_id, mtu);
> +    if (!mp) {
> +        err = ENOMEM;
> +        goto out;
> +    }
> +
> +    dev->dpdk_mp = mp;
> +    dev->mtu = mtu;
> +    dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu);
> +
> +    netdev_change_seq_changed(netdev);
> +out:
> +    ovs_mutex_unlock(&dev->mutex);
> +    ovs_mutex_unlock(&dpdk_mutex);
> +    return err;
> +}
> +
> +static int
> +netdev_dpdk_vhost_set_config(struct netdev *netdev, const struct smap *args)
> +{
> +    struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> +    int mtu;
> +
> +    ovs_mutex_lock(&dev->mutex);
> +    netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq",
> +                                               netdev->requested_n_rxq), 1);
> +    netdev_change_seq_changed(netdev);
> +    ovs_mutex_unlock(&dev->mutex);
> +
> +    if (!dev->mtu) {
> +        dpdk_dev_parse_mtu(args, &mtu);
> +        return netdev_dpdk_vhost_set_mtu(netdev, mtu);
> +    }
> +    return 0;
> +}
> +
>  static void
>  dpdk_common_init(void)
>  {
> @@ -2160,8 +2210,9 @@ unlock_dpdk:
>      return err;
>  }
>  
> -#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, \
> -    GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV)          \
> +#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, SET_CONFIG, \
> +        MULTIQ, SEND, SET_MTU, GET_CARRIER, GET_STATS, GET_FEATURES,   \
> +        GET_STATUS, RXQ_RECV)                                          \
>  {                                                             \
>      NAME,                                                     \
>      INIT,                       /* init */                    \
> @@ -2173,7 +2224,7 @@ unlock_dpdk:
>      DESTRUCT,                                                 \
>      netdev_dpdk_dealloc,                                      \
>      netdev_dpdk_get_config,                                   \
> -    netdev_dpdk_set_config,                                   \
> +    SET_CONFIG            ,                                   \
>      NULL,                       /* get_tunnel_config */       \
>      NULL,                       /* build header */            \
>      NULL,                       /* push header */             \
> @@ -2187,7 +2238,7 @@ unlock_dpdk:
>      netdev_dpdk_set_etheraddr,                                \
>      netdev_dpdk_get_etheraddr,                                \
>      netdev_dpdk_get_mtu,                                      \
> -    netdev_dpdk_set_mtu,                                      \
> +    SET_MTU,                                                  \
>      netdev_dpdk_get_ifindex,                                  \
>      GET_CARRIER,                                              \
>      netdev_dpdk_get_carrier_resets,                           \
> @@ -2333,8 +2384,10 @@ static const struct netdev_class dpdk_class =
>          NULL,
>          netdev_dpdk_construct,
>          netdev_dpdk_destruct,
> +        netdev_dpdk_set_config,
>          netdev_dpdk_set_multiq,
>          netdev_dpdk_eth_send,
> +        netdev_dpdk_set_mtu,
>          netdev_dpdk_get_carrier,
>          netdev_dpdk_get_stats,
>          netdev_dpdk_get_features,
> @@ -2347,8 +2400,10 @@ static const struct netdev_class dpdk_ring_class =
>          NULL,
>          netdev_dpdk_ring_construct,
>          netdev_dpdk_destruct,
> +        netdev_dpdk_set_config,
>          netdev_dpdk_set_multiq,
>          netdev_dpdk_ring_send,
> +        netdev_dpdk_set_mtu,
>          netdev_dpdk_get_carrier,
>          netdev_dpdk_get_stats,
>          netdev_dpdk_get_features,
> @@ -2361,8 +2416,10 @@ static const struct netdev_class OVS_UNUSED 
> dpdk_vhost_cuse_class =
>          dpdk_vhost_cuse_class_init,
>          netdev_dpdk_vhost_cuse_construct,
>          netdev_dpdk_vhost_destruct,
> +        netdev_dpdk_set_config,
>          netdev_dpdk_vhost_cuse_set_multiq,
>          netdev_dpdk_vhost_send,
> +        NULL,
>          netdev_dpdk_vhost_get_carrier,
>          netdev_dpdk_vhost_get_stats,
>          NULL,
> @@ -2375,8 +2432,10 @@ static const struct netdev_class OVS_UNUSED 
> dpdk_vhost_user_class =
>          dpdk_vhost_user_class_init,
>          netdev_dpdk_vhost_user_construct,
>          netdev_dpdk_vhost_destruct,
> +        netdev_dpdk_vhost_set_config,
>          netdev_dpdk_vhost_set_multiq,
>          netdev_dpdk_vhost_send,
> +        netdev_dpdk_vhost_set_mtu,
>          netdev_dpdk_vhost_get_carrier,
>          netdev_dpdk_vhost_get_stats,
>          NULL,

Thanks,

-- 
fbl

_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to