> On Jun 3, 2014, at 6:11 PM, Ryan Wilson <wr...@nicira.com> wrote:
> 
> When a bridge of datatype type netdev receives a packet, it copies the
> packet from the NIC to a buffer in userspace. Currently, when making
> an upcall, the packet is again copied to the upcall's buffer. However,
> this extra copy is not necessary when the datapath exists in userspace
> as the upcall can directly access the packet data.
> 
> This patch eliminates this extra copy of the packet data in most cases.
> In cases where the packet may still be used later by callers of
> dp_netdev_execute_actions, making a copy of the packet data is still
> necessary.
> 
> This patch also adds a header_ field to 'struct ofpbuf' when using
> DPDK. This field holds a pointer to the allocated buffer in the
> rte_mempool. Thus, an upcall packet ofpbuf allocated on the stack can
> now share data and free memory of a rte_mempool allocated ofpbuf.
> 
> Signed-off-by: Ryan Wilson <wr...@nicira.com>
> Acked-by: Jarno Rajahalme <jrajaha...@nicira.com>
> ---
> v2: Addressed Jarno's comment to use direct pointer assignment for
> upcall->packet instead of ofpbuf_set().
> v3: Addressed issues with DPDK mentioned by Pravin.
> v4: Fixed various bugs found in perf test
> ---
> lib/dpif-netdev.c |   15 +++++----------
> lib/netdev-dpdk.c |    2 +-
> lib/ofpbuf.c      |    6 +++++-
> lib/ofpbuf.h      |   20 ++++++++++++++------
> 4 files changed, 25 insertions(+), 18 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 91c83d6..c89ae20 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -2024,7 +2024,6 @@ dp_netdev_input(struct dp_netdev *dp, struct ofpbuf 
> *packet,
>                                    miniflow_hash_5tuple(&key.flow, 0)
>                                    % dp->n_handlers,
>                                    DPIF_UC_MISS, &key.flow, NULL);
> -        ofpbuf_delete(packet);
>     }
> }
> 
> @@ -2063,7 +2062,6 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct 
> ofpbuf *packet,
>         if (userdata) {
>             buf_size += NLA_ALIGN(userdata->nla_len);
>         }
> -        buf_size += ofpbuf_size(packet);
>         ofpbuf_init(buf, buf_size);
> 
>         /* Put ODP flow. */
> @@ -2078,15 +2076,14 @@ dp_netdev_output_userspace(struct dp_netdev *dp, 
> struct ofpbuf *packet,
>                                           NLA_ALIGN(userdata->nla_len));
>         }
> 
> -        ofpbuf_set_data(&upcall->packet,
> -                        ofpbuf_put(buf, ofpbuf_data(packet), 
> ofpbuf_size(packet)));
> -        ofpbuf_set_size(&upcall->packet, ofpbuf_size(packet));
> +        upcall->packet = *packet;
> 
>         seq_change(q->seq);
> 
>         error = 0;
>     } else {
>         dp_netdev_count_packet(dp, DP_STAT_LOST);
> +        ofpbuf_delete(packet);
>         error = ENOBUFS;
>     }
>     ovs_mutex_unlock(&q->mutex);
> @@ -2120,19 +2117,17 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
>         break;
> 
>     case OVS_ACTION_ATTR_USERSPACE: {
> +        struct ofpbuf *userspace_packet;
>         const struct nlattr *userdata;
> 
>         userdata = nl_attr_find_nested(a, OVS_USERSPACE_ATTR_USERDATA);
> +        userspace_packet = may_steal ? packet : ofpbuf_clone(packet);
> 
> -        dp_netdev_output_userspace(aux->dp, packet,
> +        dp_netdev_output_userspace(aux->dp, userspace_packet,
>                                    miniflow_hash_5tuple(aux->key, 0)
>                                        % aux->dp->n_handlers,
>                                    DPIF_UC_ACTION, aux->key,
>                                    userdata);
> -
> -        if (may_steal) {
> -            ofpbuf_delete(packet);
> -        }
>         break;
>     }
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index ba41d2e..e48bfde 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -205,7 +205,7 @@ dpdk_rte_mzalloc(size_t sz)
> void
> free_dpdk_buf(struct ofpbuf *b)
> {
> -    struct rte_mbuf *pkt = (struct rte_mbuf *) b;
> +    struct rte_mbuf *pkt = (struct rte_mbuf *) ofpbuf_header(b);
> 

Could use the address of the 'mbuf' member instead of a cast here?

>     rte_mempool_put(pkt->pool, pkt);
> }
> diff --git a/lib/ofpbuf.c b/lib/ofpbuf.c
> index 1f4b61d..9490a1e 100644
> --- a/lib/ofpbuf.c
> +++ b/lib/ofpbuf.c
> @@ -117,6 +117,9 @@ void
> ofpbuf_init_dpdk(struct ofpbuf *b, size_t allocated)
> {
>     ofpbuf_init__(b, allocated, OFPBUF_DPDK);
> +#ifdef DPDK_NETDEV
> +    ofpbuf_set_header(b, b);
> +#endif
> }
> 
> /* Initializes 'b' as an empty ofpbuf with an initial capacity of 'size'
> @@ -134,8 +137,9 @@ ofpbuf_uninit(struct ofpbuf *b)
>     if (b) {
>         if (b->source == OFPBUF_MALLOC) {
>             free(ofpbuf_base(b));
> +        } else if (b->source == OFPBUF_DPDK) {
> +            free_dpdk_buf(b);
>         }
> -        ovs_assert(b->source != OFPBUF_DPDK);
>     }
> }
> 
> diff --git a/lib/ofpbuf.h b/lib/ofpbuf.h
> index 85be899..54113d0 100644
> --- a/lib/ofpbuf.h
> +++ b/lib/ofpbuf.h
> @@ -59,6 +59,7 @@ enum OVS_PACKED_ENUM ofpbuf_source {
> struct ofpbuf {
> #ifdef DPDK_NETDEV
>     struct rte_mbuf mbuf;       /* DPDK mbuf */
> +    struct ofpbuf *header_;     /* First byte of allocated buffer header. */

Do we actually care that this is an ofpbuf pointer? Currently it is only used 
to free the dpdk buffer, so this would be clearer:

void *dpdk_buf;

> #else
>     void *base_;                 /* First byte of allocated space. */
>     void *data_;                 /* First byte actually in use. */
> @@ -174,13 +175,10 @@ static inline void *ofpbuf_get_uninit_pointer(struct 
> ofpbuf *b)
> static inline void ofpbuf_delete(struct ofpbuf *b)
> {
>     if (b) {
> -        if (b->source == OFPBUF_DPDK) {
> -            free_dpdk_buf(b);
> -            return;
> -        }
> -
>         ofpbuf_uninit(b);
> -        free(b);
> +        if (b->source != OFPBUF_DPDK) {
> +            free(b);
> +        }
>     }
> }
> 
> @@ -386,6 +384,16 @@ static inline void ofpbuf_set_size(struct ofpbuf *b, 
> uint32_t v)
>                                   * this segment. */
> }
> 
> +static inline struct ofpbuf * ofpbuf_header(const struct ofpbuf *b)
> +{
> +    return b->header_;
> +}
> +
> +static inline void ofpbuf_set_header(struct ofpbuf *b, struct ofpbuf *h)
> +{
> +    b->header_ = h;
> +}
> +

This abstraction is not necessary, if the pointer only exists for DPDK. Just 
use the pointer directly instead.

> #else
> static inline void * ofpbuf_data(const struct ofpbuf *b)
> {
> -- 
> 1.7.9.5
> 
> _______________________________________________
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev

Reply via email to