Can you make subject shorter? On Wed, Jun 4, 2014 at 11:00 AM, Ryan Wilson <wr...@nicira.com> wrote: > When a bridge of datatype type netdev receives a packet, it copies the > packet from the NIC to a buffer in userspace. Currently, when making > an upcall, the packet is again copied to the upcall's buffer. However, > this extra copy is not necessary when the datapath exists in userspace > as the upcall can directly access the packet data. > > This patch eliminates this extra copy of the packet data in most cases. > In cases where the packet may still be used later by callers of > dp_netdev_execute_actions, making a copy of the packet data is still > necessary. > > This patch also adds a dpdk_buf field to 'struct ofpbuf' when using > DPDK. This field holds a pointer to the allocated DPDK buffer in the > rte_mempool. Thus, an upcall packet ofpbuf allocated on the stack can > now share data and free memory of a rte_mempool allocated ofpbuf. > > Signed-off-by: Ryan Wilson <wr...@nicira.com> > Acked-by: Jarno Rajahalme <jrajaha...@nicira.com>
Otherwise looks good. Acked-by: Pravin B Shelar <pshe...@nicira.com> > --- > v2: Addressed Jarno's comment to use direct pointer assignment for > upcall->packet instead of ofpbuf_set(). > v3: Addressed issues with DPDK mentioned by Pravin. > v4: Fixed various bugs found in perf test > v5: Addressed Jarno's comments with regards to DPDK buf attribute. > v6: Fix commit message > --- > lib/dpif-netdev.c | 15 +++++---------- > lib/netdev-dpdk.c | 2 +- > lib/ofpbuf.c | 9 ++++++++- > lib/ofpbuf.h | 3 +++ > 4 files changed, 17 insertions(+), 12 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 91c83d6..c89ae20 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -2024,7 +2024,6 @@ dp_netdev_input(struct dp_netdev *dp, struct ofpbuf > *packet, > miniflow_hash_5tuple(&key.flow, 0) > % dp->n_handlers, > DPIF_UC_MISS, &key.flow, NULL); > - ofpbuf_delete(packet); > } > } > > @@ -2063,7 +2062,6 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct > ofpbuf *packet, > if (userdata) { > buf_size += NLA_ALIGN(userdata->nla_len); > } > - buf_size += ofpbuf_size(packet); > ofpbuf_init(buf, buf_size); > > /* Put ODP flow. */ > @@ -2078,15 +2076,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp, > struct ofpbuf *packet, > NLA_ALIGN(userdata->nla_len)); > } > > - ofpbuf_set_data(&upcall->packet, > - ofpbuf_put(buf, ofpbuf_data(packet), > ofpbuf_size(packet))); > - ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet)); > + upcall->packet = *packet; > > seq_change(q->seq); > > error = 0; > } else { > dp_netdev_count_packet(dp, DP_STAT_LOST); > + ofpbuf_delete(packet); > error = ENOBUFS; > } > ovs_mutex_unlock(&q->mutex); > @@ -2120,19 +2117,17 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, > break; > > case OVS_ACTION_ATTR_USERSPACE: { > + struct ofpbuf *userspace_packet; > const struct nlattr *userdata; > > userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); > + userspace_packet = may_steal ? packet : ofpbuf_clone(packet); > > - dp_netdev_output_userspace(aux->dp, packet, > + dp_netdev_output_userspace(aux->dp, userspace_packet, > miniflow_hash_5tuple(aux->key, 0) > % aux->dp->n_handlers, > DPIF_UC_ACTION, aux->key, > userdata); > - > - if (may_steal) { > - ofpbuf_delete(packet); > - } > break; > } > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index ba41d2e..9370e5f 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -205,7 +205,7 @@ dpdk_rte_mzalloc(size_t sz) > void > free_dpdk_buf(struct ofpbuf *b) > { > - struct rte_mbuf *pkt = (struct rte_mbuf *) b; > + struct rte_mbuf *pkt = (struct rte_mbuf *) b->dpdk_buf; > > rte_mempool_put(pkt->pool, pkt); > } > diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c > index 1f4b61d..fb59ea5 100644 > --- a/lib/ofpbuf.c > +++ b/lib/ofpbuf.c > @@ -117,6 +117,9 @@ void > ofpbuf_init_dpdk(struct ofpbuf *b, size_t allocated) > { > ofpbuf_init__(b, allocated, OFPBUF_DPDK); > +#ifdef DPDK_NETDEV > + b->dpdk_buf = b; > +#endif > } > > /* Initializes 'b' as an empty ofpbuf with an initial capacity of 'size' > @@ -134,8 +137,12 @@ ofpbuf_uninit(struct ofpbuf *b) > if (b) { > if (b->source == OFPBUF_MALLOC) { > free(ofpbuf_base(b)); > + } else if (b->source == OFPBUF_DPDK) { > +#ifdef DPDK_NETDEV > + ovs_assert(b != b->dpdk_buf); > +#endif > + free_dpdk_buf(b); > } > - ovs_assert(b->source != OFPBUF_DPDK); > } > } > > diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h > index 85be899..13a3e9d 100644 > --- a/lib/ofpbuf.h > +++ b/lib/ofpbuf.h > @@ -59,6 +59,7 @@ enum OVS_PACKED_ENUM ofpbuf_source { > struct ofpbuf { > #ifdef DPDK_NETDEV > struct rte_mbuf mbuf; /* DPDK mbuf */ > + void *dpdk_buf; /* First byte of allocated DPDK buffer. */ > #else > void *base_; /* First byte of allocated space. */ > void *data_; /* First byte actually in use. */ > @@ -354,6 +355,8 @@ static inline const void *ofpbuf_get_icmp_payload(const > struct ofpbuf *b) > } > > #ifdef DPDK_NETDEV > +BUILD_ASSERT_DECL(offsetof(struct ofpbuf, mbuf) == 0); > + > static inline void * ofpbuf_data(const struct ofpbuf *b) > { > return b->mbuf.pkt.data; > -- > 1.7.9.5 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev