On Tue, 16 Feb 2016 11:31:49 +0000 Mark Kavanagh <mark.b.kavan...@intel.com> wrote:
> Add support for Jumbo Frames to DPDK-enabled port types, > using single-segment-mbufs. > > Using this approach, the amount of memory allocated for each mbuf > to store frame data is increased to a value greater than 1518B > (typical Ethernet maximum frame length). The increased space > available in the mbuf means that an entire Jumbo Frame can be carried > in a single mbuf, as opposed to partitioning it across multiple mbuf > segments. > > The amount of space allocated to each mbuf to hold frame data is > defined dynamically by the user when adding a DPDK port to a bridge. > If an MTU value is not supplied, or the user-supplied value is invalid, > the MTU for the port defaults to standard Ethernet MTU (i.e. 1500B). > > Signed-off-by: Mark Kavanagh <mark.b.kavan...@intel.com> > --- > INSTALL.DPDK.md | 1 - > lib/netdev-dpdk.c | 259 > +++++++++++++++++++++++++++++++++--------------------- > 2 files changed, 159 insertions(+), 101 deletions(-) > > diff --git a/INSTALL.DPDK.md b/INSTALL.DPDK.md > index d892788..722fb9e 100644 > --- a/INSTALL.DPDK.md > +++ b/INSTALL.DPDK.md > @@ -881,7 +881,6 @@ determines how many queues can be used by the guest. > Restrictions: > ------------- > > - - Work with 1500 MTU, needs few changes in DPDK lib to fix this issue. > - Currently DPDK port does not make use any offload functionality. > - DPDK-vHost support works with 1G huge pages. > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index 5bcd36d..93a0930 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -77,6 +77,7 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, > 20); > + sizeof(struct dp_packet) \ > + RTE_PKTMBUF_HEADROOM) > #define NETDEV_DPDK_MBUF_ALIGN 1024 > +#define NETDEV_DPDK_MAX_FRAME_LEN 13312 > > /* Max and min number of packets in the mempool. OVS tries to allocate a > * mempool with MAX_NB_MBUF: if this fails (because the system doesn't have > @@ -294,34 +295,6 @@ free_dpdk_buf(struct dp_packet *p) > } > > static void > -__rte_pktmbuf_init(struct rte_mempool *mp, > - void *opaque_arg OVS_UNUSED, > - void *_m, > - unsigned i OVS_UNUSED) > -{ > - struct rte_mbuf *m = _m; > - uint32_t buf_len = mp->elt_size - sizeof(struct dp_packet); > - > - RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct dp_packet)); > - > - memset(m, 0, mp->elt_size); > - > - /* start of buffer is just after mbuf structure */ > - m->buf_addr = (char *)m + sizeof(struct dp_packet); > - m->buf_physaddr = rte_mempool_virt2phy(mp, m) + > - sizeof(struct dp_packet); > - m->buf_len = (uint16_t)buf_len; > - > - /* keep some headroom between start of buffer and data */ > - m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len); > - > - /* init some constant fields */ > - m->pool = mp; > - m->nb_segs = 1; > - m->port = 0xff; > -} > - > -static void > ovs_rte_pktmbuf_init(struct rte_mempool *mp, > void *opaque_arg OVS_UNUSED, > void *_m, > @@ -450,6 +423,7 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int > n_rxq, int n_txq) > { > int diag = 0; > int i; > + struct rte_eth_conf conf = port_conf; > > /* A device may report more queues than it makes available (this has > * been observed for Intel xl710, which reserves some of them for > @@ -461,7 +435,15 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk *dev, int > n_rxq, int n_txq) > VLOG_INFO("Retrying setup with (rxq:%d txq:%d)", n_rxq, n_txq); > } > > - diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &port_conf); > + if (dev->mtu > ETHER_MTU) { > + conf.rxmode.jumbo_frame = 1; > + conf.rxmode.max_rx_pkt_len = dev->max_packet_len; > + } else { > + conf.rxmode.jumbo_frame = 0; > + conf.rxmode.max_rx_pkt_len = 0; > + } > + > + diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, &conf); > if (diag) { > break; > } > @@ -602,8 +584,6 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int > port_no, > { > struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > int sid; > - int err = 0; > - uint32_t buf_size; > > ovs_mutex_init(&netdev->mutex); > ovs_mutex_lock(&netdev->mutex); > @@ -623,15 +603,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int > port_no, > netdev->port_id = port_no; > netdev->type = type; > netdev->flags = 0; > - netdev->mtu = ETHER_MTU; > - netdev->max_packet_len = ETHER_MAX_LEN; > - > - buf_size = dpdk_buf_size(netdev->mtu); > - netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, > FRAME_LEN_TO_MTU(buf_size)); > - if (!netdev->dpdk_mp) { > - err = ENOMEM; > - goto unlock; > - } > + netdev->mtu = 0; This zero is special because it means we can set MTU otherwise the mtu_request is just ignored, so maybe we could define MTU_NOT_SET or something like that. > netdev_->n_txq = NR_QUEUE; > netdev_->n_rxq = NR_QUEUE; > @@ -640,20 +612,12 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned int > port_no, > > if (type == DPDK_DEV_ETH) { > netdev_dpdk_alloc_txq(netdev, NR_QUEUE); > - err = dpdk_eth_dev_init(netdev); > - if (err) { > - goto unlock; > - } > } > > list_push_back(&dpdk_list, &netdev->list_node); > > -unlock: > - if (err) { > - rte_free(netdev->tx_q); > - } > ovs_mutex_unlock(&netdev->mutex); > - return err; > + return 0; > } > > static int > @@ -671,6 +635,27 @@ dpdk_dev_parse_name(const char dev_name[], const char > prefix[], > return 0; > } > > +static void > +dpdk_dev_parse_mtu(const struct smap *args, int *mtu) > +{ > + const char *mtu_str = smap_get(args, "mtu_request"); > + char *end_ptr = NULL; > + int local_mtu; > + > + if (mtu_str) { > + local_mtu = strtoul(mtu_str, &end_ptr, 0); > + } > + if (!mtu_str || local_mtu < ETHER_MTU || > + local_mtu > FRAME_LEN_TO_MTU(NETDEV_DPDK_MAX_FRAME_LEN) || > + *end_ptr != '\0') { > + local_mtu = ETHER_MTU; > + VLOG_WARN("Invalid or missing mtu_request parameter - defaulting to > %d.\n", > + local_mtu); This prints a warning message by default. It should only tell if the MTU is invalid and perhaps if the mtu different than the default as an informative message. > + } > + > + *mtu = local_mtu; > +} > + > static int > vhost_construct_helper(struct netdev *netdev_) OVS_REQUIRES(dpdk_mutex) > { > @@ -801,15 +786,72 @@ netdev_dpdk_get_config(const struct netdev *netdev, > struct smap *args) > smap_add_format(args, "configured_rx_queues", "%d", netdev->n_rxq); > smap_add_format(args, "requested_tx_queues", "%d", netdev->n_txq); > smap_add_format(args, "configured_tx_queues", "%d", dev->real_n_txq); > + smap_add_format(args, "mtu", "%d", dev->mtu); > ovs_mutex_unlock(&dev->mutex); > > return 0; > } > > +/* Set the mtu of DPDK_DEV_ETH ports */ > +static int > +netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu) > +{ > + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > + int err, dpdk_mtu; > + uint32_t buf_size; > + struct dpdk_mp *mp; > + > + ovs_mutex_lock(&dpdk_mutex); > + ovs_mutex_lock(&dev->mutex); > + if (dev->mtu == mtu) { > + err = 0; > + goto out; > + } > + > + buf_size = dpdk_buf_size(mtu); > + dpdk_mtu = FRAME_LEN_TO_MTU(buf_size); > + > + mp = dpdk_mp_get(dev->socket_id, dpdk_mtu); > + if (!mp) { > + err = ENOMEM; > + goto out; > + } > + > + rte_eth_dev_stop(dev->port_id); > + > + dev->dpdk_mp = mp; > + dev->mtu = mtu; > + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); > + > + err = dpdk_eth_dev_init(dev); > + if (err) { > + VLOG_WARN("Unable to set MTU '%d' for '%s'; falling back to default " > + "MTU '%d'\n", mtu, dev->up.name, ETHER_MTU); > + dpdk_mp_put(mp); > + dev->mtu = ETHER_MTU; > + mp = dpdk_mp_get(dev->socket_id, dev->mtu); > + if (!mp) { > + err = ENOMEM; > + goto out; > + } > + dev->dpdk_mp = mp; > + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); > + dpdk_eth_dev_init(dev); > + goto out; > + } else { > + netdev_change_seq_changed(netdev); > + } > +out: > + ovs_mutex_unlock(&dev->mutex); > + ovs_mutex_unlock(&dpdk_mutex); > + return err; > +} > + > static int > netdev_dpdk_set_config(struct netdev *netdev, const struct smap *args) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > + int mtu; > > ovs_mutex_lock(&dev->mutex); > netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq", > @@ -817,6 +859,10 @@ netdev_dpdk_set_config(struct netdev *netdev, const > struct smap *args) > netdev_change_seq_changed(netdev); > ovs_mutex_unlock(&dev->mutex); > > + if (!dev->mtu) { > + dpdk_dev_parse_mtu(args, &mtu); > + return netdev_dpdk_set_mtu(netdev, mtu); > + } Can we actually check if dev->mtu is different from mtu_request and then print a warning if so? Because the requested config will be ignored. > return 0; > } > > @@ -1435,53 +1481,6 @@ netdev_dpdk_get_mtu(const struct netdev *netdev, int > *mtup) > } > > static int > -netdev_dpdk_set_mtu(const struct netdev *netdev, int mtu) > -{ > - struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > - int old_mtu, err; > - struct dpdk_mp *old_mp; > - struct dpdk_mp *mp; > - > - ovs_mutex_lock(&dpdk_mutex); > - ovs_mutex_lock(&dev->mutex); > - if (dev->mtu == mtu) { > - err = 0; > - goto out; > - } > - > - mp = dpdk_mp_get(dev->socket_id, dev->mtu); > - if (!mp) { > - err = ENOMEM; > - goto out; > - } > - > - rte_eth_dev_stop(dev->port_id); > - > - old_mtu = dev->mtu; > - old_mp = dev->dpdk_mp; > - dev->dpdk_mp = mp; > - dev->mtu = mtu; > - dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); > - > - err = dpdk_eth_dev_init(dev); > - if (err) { > - dpdk_mp_put(mp); > - dev->mtu = old_mtu; > - dev->dpdk_mp = old_mp; > - dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); > - dpdk_eth_dev_init(dev); > - goto out; > - } > - > - dpdk_mp_put(old_mp); > - netdev_change_seq_changed(netdev); > -out: > - ovs_mutex_unlock(&dev->mutex); > - ovs_mutex_unlock(&dpdk_mutex); > - return err; > -} > - > -static int > netdev_dpdk_get_carrier(const struct netdev *netdev_, bool *carrier); > > static int > @@ -2024,6 +2023,57 @@ dpdk_vhost_user_class_init(void) > return 0; > } > > +/* Set the mtu of DPDK_DEV_VHOST ports */ > +static int > +netdev_dpdk_vhost_set_mtu(const struct netdev *netdev, int mtu) > +{ > + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > + int err = 0; > + struct dpdk_mp *mp; > + > + ovs_mutex_lock(&dpdk_mutex); > + ovs_mutex_lock(&dev->mutex); > + if (dev->mtu == mtu) { > + err = 0; > + goto out; > + } > + > + mp = dpdk_mp_get(dev->socket_id, mtu); > + if (!mp) { > + err = ENOMEM; > + goto out; > + } > + > + dev->dpdk_mp = mp; > + dev->mtu = mtu; > + dev->max_packet_len = MTU_TO_FRAME_LEN(dev->mtu); > + > + netdev_change_seq_changed(netdev); > +out: > + ovs_mutex_unlock(&dev->mutex); > + ovs_mutex_unlock(&dpdk_mutex); > + return err; > +} > + > +static int > +netdev_dpdk_vhost_set_config(struct netdev *netdev, const struct smap *args) > +{ > + struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > + int mtu; > + > + ovs_mutex_lock(&dev->mutex); > + netdev->requested_n_rxq = MAX(smap_get_int(args, "n_rxq", > + netdev->requested_n_rxq), 1); > + netdev_change_seq_changed(netdev); > + ovs_mutex_unlock(&dev->mutex); > + > + if (!dev->mtu) { > + dpdk_dev_parse_mtu(args, &mtu); > + return netdev_dpdk_vhost_set_mtu(netdev, mtu); > + } > + return 0; > +} > + > static void > dpdk_common_init(void) > { > @@ -2160,8 +2210,9 @@ unlock_dpdk: > return err; > } > > -#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, MULTIQ, SEND, \ > - GET_CARRIER, GET_STATS, GET_FEATURES, GET_STATUS, RXQ_RECV) \ > +#define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT, SET_CONFIG, \ > + MULTIQ, SEND, SET_MTU, GET_CARRIER, GET_STATS, GET_FEATURES, \ > + GET_STATUS, RXQ_RECV) \ > { \ > NAME, \ > INIT, /* init */ \ > @@ -2173,7 +2224,7 @@ unlock_dpdk: > DESTRUCT, \ > netdev_dpdk_dealloc, \ > netdev_dpdk_get_config, \ > - netdev_dpdk_set_config, \ > + SET_CONFIG , \ > NULL, /* get_tunnel_config */ \ > NULL, /* build header */ \ > NULL, /* push header */ \ > @@ -2187,7 +2238,7 @@ unlock_dpdk: > netdev_dpdk_set_etheraddr, \ > netdev_dpdk_get_etheraddr, \ > netdev_dpdk_get_mtu, \ > - netdev_dpdk_set_mtu, \ > + SET_MTU, \ > netdev_dpdk_get_ifindex, \ > GET_CARRIER, \ > netdev_dpdk_get_carrier_resets, \ > @@ -2333,8 +2384,10 @@ static const struct netdev_class dpdk_class = > NULL, > netdev_dpdk_construct, > netdev_dpdk_destruct, > + netdev_dpdk_set_config, > netdev_dpdk_set_multiq, > netdev_dpdk_eth_send, > + netdev_dpdk_set_mtu, > netdev_dpdk_get_carrier, > netdev_dpdk_get_stats, > netdev_dpdk_get_features, > @@ -2347,8 +2400,10 @@ static const struct netdev_class dpdk_ring_class = > NULL, > netdev_dpdk_ring_construct, > netdev_dpdk_destruct, > + netdev_dpdk_set_config, > netdev_dpdk_set_multiq, > netdev_dpdk_ring_send, > + netdev_dpdk_set_mtu, > netdev_dpdk_get_carrier, > netdev_dpdk_get_stats, > netdev_dpdk_get_features, > @@ -2361,8 +2416,10 @@ static const struct netdev_class OVS_UNUSED > dpdk_vhost_cuse_class = > dpdk_vhost_cuse_class_init, > netdev_dpdk_vhost_cuse_construct, > netdev_dpdk_vhost_destruct, > + netdev_dpdk_set_config, > netdev_dpdk_vhost_cuse_set_multiq, > netdev_dpdk_vhost_send, > + NULL, > netdev_dpdk_vhost_get_carrier, > netdev_dpdk_vhost_get_stats, > NULL, > @@ -2375,8 +2432,10 @@ static const struct netdev_class OVS_UNUSED > dpdk_vhost_user_class = > dpdk_vhost_user_class_init, > netdev_dpdk_vhost_user_construct, > netdev_dpdk_vhost_destruct, > + netdev_dpdk_vhost_set_config, > netdev_dpdk_vhost_set_multiq, > netdev_dpdk_vhost_send, > + netdev_dpdk_vhost_set_mtu, > netdev_dpdk_vhost_get_carrier, > netdev_dpdk_vhost_get_stats, > NULL, Thanks, -- fbl _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev