Some minor comments just in case you decide to spin another version: On Mon, 1 Feb 2016 10:18:54 +0000 Mark Kavanagh <mark.b.kavan...@intel.com> wrote:
[...] > +static uint32_t > +dpdk_buf_size(struct netdev_dpdk *netdev, int frame_len) > +{ > + struct rte_eth_dev_info info; > + uint32_t buf_size; > + /* XXX: This is a workaround for DPDK v2.2, and needs to be > refactored with a > + * future DPDK release. */ > + uint32_t len = frame_len + (2 * DPDK_VLAN_TAG_LEN); > + > + if(netdev->type == DPDK_DEV_ETH) { ^ space here > + rte_eth_dev_info_get(netdev->port_id, &info); > + buf_size = (info.min_rx_bufsize == 0) ? > + NETDEV_DPDK_DEFAULT_RX_BUFSIZE : > + info.min_rx_bufsize; > + } else { > + buf_size = NETDEV_DPDK_DEFAULT_RX_BUFSIZE; > + } > + > + if(len % buf_size) { also here > + len = buf_size * ((len/buf_size) + 1); > + } > + > + return len; > +} > + > /* XXX: use dpdk malloc for entire OVS. in fact huge page should be > used > * for all other segments data, bss and text. */ > > @@ -280,26 +321,65 @@ free_dpdk_buf(struct dp_packet *p) > } > > static void > -__rte_pktmbuf_init(struct rte_mempool *mp, > - void *opaque_arg OVS_UNUSED, > - void *_m, > - unsigned i OVS_UNUSED) > +ovs_rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg) > { > - struct rte_mbuf *m = _m; > - uint32_t buf_len = mp->elt_size - sizeof(struct dp_packet); > + struct rte_pktmbuf_pool_private *user_mbp_priv, *mbp_priv; > + struct rte_pktmbuf_pool_private default_mbp_priv; > + uint16_t roomsz; > > RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct dp_packet)); > > + /* if no structure is provided, assume no mbuf private area */ > + > + user_mbp_priv = opaque_arg; > + if (user_mbp_priv == NULL) { > + default_mbp_priv.mbuf_priv_size = 0; > + if (mp->elt_size > sizeof(struct dp_packet)) { > + roomsz = mp->elt_size - sizeof(struct dp_packet); > + } else { > + roomsz = 0; > + } > + default_mbp_priv.mbuf_data_room_size = roomsz; > + user_mbp_priv = &default_mbp_priv; > + } > + > + RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct dp_packet) + > + user_mbp_priv->mbuf_data_room_size + > + user_mbp_priv->mbuf_priv_size); > + > + mbp_priv = rte_mempool_get_priv(mp); > + memcpy(mbp_priv, user_mbp_priv, sizeof(*mbp_priv)); > +} > + > +/* Initialise some fields in the mbuf structure that are not > modified by the > + * user once created (origin pool, buffer start address, etc.*/ > +static void > +__ovs_rte_pktmbuf_init(struct rte_mempool *mp, > + void *opaque_arg OVS_UNUSED, > + void *_m, > + unsigned i OVS_UNUSED) > +{ > + struct rte_mbuf *m = _m; > + uint32_t buf_size, buf_len, priv_size; > + > + priv_size = rte_pktmbuf_priv_size(mp); > + buf_size = sizeof(struct dp_packet) + priv_size; > + buf_len = rte_pktmbuf_data_room_size(mp); > + > + RTE_MBUF_ASSERT(RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) == > priv_size); > + RTE_MBUF_ASSERT(mp->elt_size >= buf_size); > + RTE_MBUF_ASSERT(buf_len <= UINT16_MAX); > + > memset(m, 0, mp->elt_size); > > - /* start of buffer is just after mbuf structure */ > - m->buf_addr = (char *)m + sizeof(struct dp_packet); > - m->buf_physaddr = rte_mempool_virt2phy(mp, m) + > - sizeof(struct dp_packet); > + /* start of buffer is after dp_packet structure and priv data */ > + m->priv_size = priv_size; > + m->buf_addr = (char *)m + buf_size; > + m->buf_physaddr = rte_mempool_virt2phy(mp, m) + buf_size; > m->buf_len = (uint16_t)buf_len; > > /* keep some headroom between start of buffer and data */ > - m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, m->buf_len); > + m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, > (uint16_t)m->buf_len); > /* init some constant fields */ > m->pool = mp; > @@ -315,7 +395,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp, > { > struct rte_mbuf *m = _m; > > - __rte_pktmbuf_init(mp, opaque_arg, _m, i); > + __ovs_rte_pktmbuf_init(mp, opaque_arg, m, i); > > dp_packet_init_dpdk((struct dp_packet *) m, m->buf_len); > } > @@ -326,6 +406,7 @@ dpdk_mp_get(int socket_id, int mtu) > OVS_REQUIRES(dpdk_mutex) struct dpdk_mp *dmp = NULL; > char mp_name[RTE_MEMPOOL_NAMESIZE]; > unsigned mp_size; > + struct rte_pktmbuf_pool_private mbp_priv; > > LIST_FOR_EACH (dmp, list_node, &dpdk_mp_list) { > if (dmp->socket_id == socket_id && dmp->mtu == mtu) { > @@ -338,6 +419,8 @@ dpdk_mp_get(int socket_id, int mtu) > OVS_REQUIRES(dpdk_mutex) dmp->socket_id = socket_id; > dmp->mtu = mtu; > dmp->refcount = 1; > + mbp_priv.mbuf_data_room_size = MTU_TO_FRAME_LEN(mtu) + > RTE_PKTMBUF_HEADROOM; > + mbp_priv.mbuf_priv_size = 0; > > mp_size = MAX_NB_MBUF; > do { > @@ -346,10 +429,10 @@ dpdk_mp_get(int socket_id, int mtu) > OVS_REQUIRES(dpdk_mutex) return NULL; > } > > - dmp->mp = rte_mempool_create(mp_name, mp_size, > MBUF_SIZE(mtu), > + dmp->mp = rte_mempool_create(mp_name, mp_size, > MBUF_SEGMENT_SIZE(mtu), MP_CACHE_SZ, > sizeof(struct > rte_pktmbuf_pool_private), > - rte_pktmbuf_pool_init, NULL, > + ovs_rte_pktmbuf_pool_init, > &mbp_priv, ovs_rte_pktmbuf_init, NULL, > socket_id, 0); > } while (!dmp->mp && rte_errno == ENOMEM && (mp_size /= 2) >= > MIN_NB_MBUF); @@ -433,6 +516,7 @@ dpdk_eth_dev_queue_setup(struct > netdev_dpdk *dev, int n_rxq, int n_txq) { > int diag = 0; > int i; > + struct rte_eth_conf conf = port_conf; > > /* A device may report more queues than it makes available (this > has > * been observed for Intel xl710, which reserves some of them for > @@ -444,7 +528,12 @@ dpdk_eth_dev_queue_setup(struct netdev_dpdk > *dev, int n_rxq, int n_txq) VLOG_INFO("Retrying setup with (rxq:%d > txq:%d)", n_rxq, n_txq); } > > - diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, > &port_conf); > + if(OVS_UNLIKELY(dev->mtu > ETHER_MTU)) { space needed > + conf.rxmode.jumbo_frame = > NETDEV_DPDK_JUMBO_FRAME_ENABLED; > + conf.rxmode.max_rx_pkt_len = MTU_TO_FRAME_LEN(dev->mtu); > + } > + > + diag = rte_eth_dev_configure(dev->port_id, n_rxq, n_txq, > &conf); if (diag) { > break; > } > @@ -586,6 +675,7 @@ netdev_dpdk_init(struct netdev *netdev_, unsigned > int port_no, struct netdev_dpdk *netdev = netdev_dpdk_cast(netdev_); > int sid; > int err = 0; > + uint32_t buf_size; > > ovs_mutex_init(&netdev->mutex); > ovs_mutex_lock(&netdev->mutex); > @@ -605,10 +695,16 @@ netdev_dpdk_init(struct netdev *netdev_, > unsigned int port_no, netdev->port_id = port_no; > netdev->type = type; > netdev->flags = 0; > + > + /* Initialize port's MTU and frame len to the default Ethernet > values. > + * Larger, user-specified (jumbo) frame buffers are accommodated > in > + * netdev_dpdk_set_config. > + */ > + netdev->max_packet_len = ETHER_MAX_LEN; > netdev->mtu = ETHER_MTU; > - netdev->max_packet_len = MTU_TO_MAX_LEN(netdev->mtu); > > - netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, netdev->mtu); > + buf_size = dpdk_buf_size(netdev, ETHER_MAX_LEN); > + netdev->dpdk_mp = dpdk_mp_get(netdev->socket_id, > FRAME_LEN_TO_MTU(buf_size)); if (!netdev->dpdk_mp) { > err = ENOMEM; > goto unlock; > @@ -651,6 +747,27 @@ dpdk_dev_parse_name(const char dev_name[], const > char prefix[], return 0; > } > > +static void > +dpdk_dev_parse_mtu(const struct smap *args, int *mtu) > +{ > + const char *mtu_str = smap_get(args, "dpdk-mtu"); > + char *end_ptr = NULL; > + int local_mtu; > + > + if(mtu_str) { > + local_mtu = strtoul(mtu_str, &end_ptr, 0); > + } > + if(!mtu_str || local_mtu < ETHER_MTU || > + local_mtu > > FRAME_LEN_TO_MTU(NETDEV_DPDK_MAX_FRAME_LEN) || > + *end_ptr != '\0') { This needs to align with !mtu_str Thanks, -- fbl _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev