> On Jun 3, 2014, at 6:11 PM, Ryan Wilson <wr...@nicira.com> wrote: > > When a bridge of datatype type netdev receives a packet, it copies the > packet from the NIC to a buffer in userspace. Currently, when making > an upcall, the packet is again copied to the upcall's buffer. However, > this extra copy is not necessary when the datapath exists in userspace > as the upcall can directly access the packet data. > > This patch eliminates this extra copy of the packet data in most cases. > In cases where the packet may still be used later by callers of > dp_netdev_execute_actions, making a copy of the packet data is still > necessary. > > This patch also adds a header_ field to 'struct ofpbuf' when using > DPDK. This field holds a pointer to the allocated buffer in the > rte_mempool. Thus, an upcall packet ofpbuf allocated on the stack can > now share data and free memory of a rte_mempool allocated ofpbuf. > > Signed-off-by: Ryan Wilson <wr...@nicira.com> > Acked-by: Jarno Rajahalme <jrajaha...@nicira.com> > --- > v2: Addressed Jarno's comment to use direct pointer assignment for > upcall->packet instead of ofpbuf_set(). > v3: Addressed issues with DPDK mentioned by Pravin. > v4: Fixed various bugs found in perf test > --- > lib/dpif-netdev.c | 15 +++++---------- > lib/netdev-dpdk.c | 2 +- > lib/ofpbuf.c | 6 +++++- > lib/ofpbuf.h | 20 ++++++++++++++------ > 4 files changed, 25 insertions(+), 18 deletions(-) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 91c83d6..c89ae20 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -2024,7 +2024,6 @@ dp_netdev_input(struct dp_netdev *dp, struct ofpbuf > *packet, > miniflow_hash_5tuple(&key.flow, 0) > % dp->n_handlers, > DPIF_UC_MISS, &key.flow, NULL); > - ofpbuf_delete(packet); > } > } > > @@ -2063,7 +2062,6 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct > ofpbuf *packet, > if (userdata) { > buf_size += NLA_ALIGN(userdata->nla_len); > } > - buf_size += ofpbuf_size(packet); > ofpbuf_init(buf, buf_size); > > /* Put ODP flow. */ > @@ -2078,15 +2076,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp, > struct ofpbuf *packet, > NLA_ALIGN(userdata->nla_len)); > } > > - ofpbuf_set_data(&upcall->packet, > - ofpbuf_put(buf, ofpbuf_data(packet), > ofpbuf_size(packet))); > - ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet)); > + upcall->packet = *packet; > > seq_change(q->seq); > > error = 0; > } else { > dp_netdev_count_packet(dp, DP_STAT_LOST); > + ofpbuf_delete(packet); > error = ENOBUFS; > } > ovs_mutex_unlock(&q->mutex); > @@ -2120,19 +2117,17 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, > break; > > case OVS_ACTION_ATTR_USERSPACE: { > + struct ofpbuf *userspace_packet; > const struct nlattr *userdata; > > userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); > + userspace_packet = may_steal ? packet : ofpbuf_clone(packet); > > - dp_netdev_output_userspace(aux->dp, packet, > + dp_netdev_output_userspace(aux->dp, userspace_packet, > miniflow_hash_5tuple(aux->key, 0) > % aux->dp->n_handlers, > DPIF_UC_ACTION, aux->key, > userdata); > - > - if (may_steal) { > - ofpbuf_delete(packet); > - } > break; > } > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c > index ba41d2e..e48bfde 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -205,7 +205,7 @@ dpdk_rte_mzalloc(size_t sz) > void > free_dpdk_buf(struct ofpbuf *b) > { > - struct rte_mbuf *pkt = (struct rte_mbuf *) b; > + struct rte_mbuf *pkt = (struct rte_mbuf *) ofpbuf_header(b); >
Could use the address of the 'mbuf' member instead of a cast here? > rte_mempool_put(pkt->pool, pkt); > } > diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c > index 1f4b61d..9490a1e 100644 > --- a/lib/ofpbuf.c > +++ b/lib/ofpbuf.c > @@ -117,6 +117,9 @@ void > ofpbuf_init_dpdk(struct ofpbuf *b, size_t allocated) > { > ofpbuf_init__(b, allocated, OFPBUF_DPDK); > +#ifdef DPDK_NETDEV > + ofpbuf_set_header(b, b); > +#endif > } > > /* Initializes 'b' as an empty ofpbuf with an initial capacity of 'size' > @@ -134,8 +137,9 @@ ofpbuf_uninit(struct ofpbuf *b) > if (b) { > if (b->source == OFPBUF_MALLOC) { > free(ofpbuf_base(b)); > + } else if (b->source == OFPBUF_DPDK) { > + free_dpdk_buf(b); > } > - ovs_assert(b->source != OFPBUF_DPDK); > } > } > > diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h > index 85be899..54113d0 100644 > --- a/lib/ofpbuf.h > +++ b/lib/ofpbuf.h > @@ -59,6 +59,7 @@ enum OVS_PACKED_ENUM ofpbuf_source { > struct ofpbuf { > #ifdef DPDK_NETDEV > struct rte_mbuf mbuf; /* DPDK mbuf */ > + struct ofpbuf *header_; /* First byte of allocated buffer header. */ Do we actually care that this is an ofpbuf pointer? Currently it is only used to free the dpdk buffer, so this would be clearer: void *dpdk_buf; > #else > void *base_; /* First byte of allocated space. */ > void *data_; /* First byte actually in use. */ > @@ -174,13 +175,10 @@ static inline void *ofpbuf_get_uninit_pointer(struct > ofpbuf *b) > static inline void ofpbuf_delete(struct ofpbuf *b) > { > if (b) { > - if (b->source == OFPBUF_DPDK) { > - free_dpdk_buf(b); > - return; > - } > - > ofpbuf_uninit(b); > - free(b); > + if (b->source != OFPBUF_DPDK) { > + free(b); > + } > } > } > > @@ -386,6 +384,16 @@ static inline void ofpbuf_set_size(struct ofpbuf *b, > uint32_t v) > * this segment. */ > } > > +static inline struct ofpbuf * ofpbuf_header(const struct ofpbuf *b) > +{ > + return b->header_; > +} > + > +static inline void ofpbuf_set_header(struct ofpbuf *b, struct ofpbuf *h) > +{ > + b->header_ = h; > +} > + This abstraction is not necessary, if the pointer only exists for DPDK. Just use the pointer directly instead. > #else > static inline void * ofpbuf_data(const struct ofpbuf *b) > { > -- > 1.7.9.5 > > _______________________________________________ > dev mailing list > dev@openvswitch.org > http://openvswitch.org/mailman/listinfo/dev _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev