When a bridge of datatype type netdev receives a packet, it copies the packet from the NIC to a buffer in userspace. Currently, when making an upcall, the packet is again copied to the upcall's buffer. However, this extra copy is not necessary when the datapath exists in userspace as the upcall can directly access the packet data.
This patch eliminates this extra copy of the packet data in most cases. In cases where the packet may still be used later by callers of dp_netdev_execute_actions, making a copy of the packet data is still necessary. This patch also adds a header_ field to 'struct ofpbuf' when using DPDK. This field holds a pointer to the allocated buffer in the rte_mempool. Thus, an upcall packet ofpbuf allocated on the stack can now share data and free memory of a rte_mempool allocated ofpbuf. Signed-off-by: Ryan Wilson <wr...@nicira.com> Acked-by: Jarno Rajahalme <jrajaha...@nicira.com> --- v2: Addressed Jarno's comment to use direct pointer assignment for upcall->packet instead of ofpbuf_set(). v3: Addressed issues with DPDK mentioned by Pravin. v4: Fixed various bugs found in perf test --- lib/dpif-netdev.c | 15 +++++---------- lib/netdev-dpdk.c | 2 +- lib/ofpbuf.c | 6 +++++- lib/ofpbuf.h | 20 ++++++++++++++------ 4 files changed, 25 insertions(+), 18 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 91c83d6..c89ae20 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2024,7 +2024,6 @@ dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet, miniflow_hash_5tuple(&key.flow, 0) % dp->n_handlers, DPIF_UC_MISS, &key.flow, NULL); - ofpbuf_delete(packet); } } @@ -2063,7 +2062,6 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, if (userdata) { buf_size += NLA_ALIGN(userdata->nla_len); } - buf_size += ofpbuf_size(packet); ofpbuf_init(buf, buf_size); /* Put ODP flow. */ @@ -2078,15 +2076,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, NLA_ALIGN(userdata->nla_len)); } - ofpbuf_set_data(&upcall->packet, - ofpbuf_put(buf, ofpbuf_data(packet), ofpbuf_size(packet))); - ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet)); + upcall->packet = *packet; seq_change(q->seq); error = 0; } else { dp_netdev_count_packet(dp, DP_STAT_LOST); + ofpbuf_delete(packet); error = ENOBUFS; } ovs_mutex_unlock(&q->mutex); @@ -2120,19 +2117,17 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, break; case OVS_ACTION_ATTR_USERSPACE: { + struct ofpbuf *userspace_packet; const struct nlattr *userdata; userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA); + userspace_packet = may_steal ? packet : ofpbuf_clone(packet); - dp_netdev_output_userspace(aux->dp, packet, + dp_netdev_output_userspace(aux->dp, userspace_packet, miniflow_hash_5tuple(aux->key, 0) % aux->dp->n_handlers, DPIF_UC_ACTION, aux->key, userdata); - - if (may_steal) { - ofpbuf_delete(packet); - } break; } diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index ba41d2e..e48bfde 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -205,7 +205,7 @@ dpdk_rte_mzalloc(size_t sz) void free_dpdk_buf(struct ofpbuf *b) { - struct rte_mbuf *pkt = (struct rte_mbuf *) b; + struct rte_mbuf *pkt = (struct rte_mbuf *) ofpbuf_header(b); rte_mempool_put(pkt->pool, pkt); } diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c index 1f4b61d..9490a1e 100644 --- a/lib/ofpbuf.c +++ b/lib/ofpbuf.c @@ -117,6 +117,9 @@ void ofpbuf_init_dpdk(struct ofpbuf *b, size_t allocated) { ofpbuf_init__(b, allocated, OFPBUF_DPDK); +#ifdef DPDK_NETDEV + ofpbuf_set_header(b, b); +#endif } /* Initializes 'b' as an empty ofpbuf with an initial capacity of 'size' @@ -134,8 +137,9 @@ ofpbuf_uninit(struct ofpbuf *b) if (b) { if (b->source == OFPBUF_MALLOC) { free(ofpbuf_base(b)); + } else if (b->source == OFPBUF_DPDK) { + free_dpdk_buf(b); } - ovs_assert(b->source != OFPBUF_DPDK); } } diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h index 85be899..54113d0 100644 --- a/lib/ofpbuf.h +++ b/lib/ofpbuf.h @@ -59,6 +59,7 @@ enum OVS_PACKED_ENUM ofpbuf_source { struct ofpbuf { #ifdef DPDK_NETDEV struct rte_mbuf mbuf; /* DPDK mbuf */ + struct ofpbuf *header_; /* First byte of allocated buffer header. */ #else void *base_; /* First byte of allocated space. */ void *data_; /* First byte actually in use. */ @@ -174,13 +175,10 @@ static inline void *ofpbuf_get_uninit_pointer(struct ofpbuf *b) static inline void ofpbuf_delete(struct ofpbuf *b) { if (b) { - if (b->source == OFPBUF_DPDK) { - free_dpdk_buf(b); - return; - } - ofpbuf_uninit(b); - free(b); + if (b->source != OFPBUF_DPDK) { + free(b); + } } } @@ -386,6 +384,16 @@ static inline void ofpbuf_set_size(struct ofpbuf *b, uint32_t v) * this segment. */ } +static inline struct ofpbuf * ofpbuf_header(const struct ofpbuf *b) +{ + return b->header_; +} + +static inline void ofpbuf_set_header(struct ofpbuf *b, struct ofpbuf *h) +{ + b->header_ = h; +} + #else static inline void * ofpbuf_data(const struct ofpbuf *b) { -- 1.7.9.5 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev