On Fri, May 23, 2014 at 11:04 AM, Daniele Di Proietto
<[email protected]> wrote:
> This change in dpif-netdev allows faster packet receiving for devices
> like netdev-dpdk which implement batch receiving.
>
> Signed-off-by: Daniele Di Proietto <[email protected]>
> ---
> This patchset allows dpif-netdev to process packets in batches.
> It boosts the performance of OVS with DPDK (forwarding a single unicast flow)
> from 1.9 Mpps to 3.3 Mpps in my tests.
>
> It is the first optimization step. There's still a lot of room for improvement
> until we hit 14.88 Mpps.
>
> Can you help review this, please, Pravin?
> ---
> lib/dpif-netdev.c | 101
> +++++++++++++++++++++++++++++-------------------------
> 1 file changed, 54 insertions(+), 47 deletions(-)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 91c83d6..d50452f 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -341,8 +341,8 @@ static void dp_netdev_execute_actions(struct dp_netdev
> *dp,
> struct pkt_metadata *,
> const struct nlattr *actions,
> size_t actions_len);
> -static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
> - struct pkt_metadata *);
> +static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf
> **packets,
> + int c, odp_port_t port_no);
>
> static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n);
>
> @@ -1747,17 +1747,12 @@ dp_netdev_process_rxq_port(struct dp_netdev *dp,
> struct dp_netdev_port *port,
> struct netdev_rxq *rxq)
> {
> - struct ofpbuf *packet[NETDEV_MAX_RX_BATCH];
> + struct ofpbuf *packets[NETDEV_MAX_RX_BATCH];
> int error, c;
>
> - error = netdev_rxq_recv(rxq, packet, &c);
> + error = netdev_rxq_recv(rxq, packets, &c);
> if (!error) {
> - struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no);
> - int i;
> -
> - for (i = 0; i < c; i++) {
> - dp_netdev_port_input(dp, packet[i], &md);
> - }
> + dp_netdev_port_input(dp, packets, c, port->port_no);
> } else if (error != EAGAIN && error != EOPNOTSUPP) {
> static struct vlog_rate_limit rl
> = VLOG_RATE_LIMIT_INIT(1, 5);
> @@ -1954,7 +1949,7 @@ dp_netdev_flow_stats_new_cb(void)
>
> static void
> dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
> - const struct ofpbuf *packet,
> + int c, int size,
> const struct miniflow *key)
> {
> uint16_t tcp_flags = miniflow_get_tcp_flags(key);
> @@ -1966,8 +1961,8 @@ dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow,
>
> ovs_mutex_lock(&bucket->mutex);
> bucket->used = MAX(now, bucket->used);
> - bucket->packet_count++;
> - bucket->byte_count += ofpbuf_size(packet);
> + bucket->packet_count += c;
> + bucket->byte_count += size;
> bucket->tcp_flags |= tcp_flags;
> ovs_mutex_unlock(&bucket->mutex);
> }
> @@ -1981,61 +1976,73 @@ dp_netdev_stats_new_cb(void)
> }
>
> static void
> -dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type)
> +dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int
> packets)
> {
> struct dp_netdev_stats *bucket;
>
> bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb);
> ovs_mutex_lock(&bucket->mutex);
> - bucket->n[type]++;
> + bucket->n[type] += packets;
> ovs_mutex_unlock(&bucket->mutex);
> }
>
> static void
> -dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet,
> +dp_netdev_input(struct dp_netdev *dp, struct ofpbuf **packets, int c,
> struct pkt_metadata *md)
> {
> - struct dp_netdev_flow *netdev_flow;
> - struct {
> - struct miniflow flow;
> - uint32_t buf[FLOW_U32S];
> - } key;
> + int i;
>
> - if (ofpbuf_size(packet) < ETH_HEADER_LEN) {
> - ofpbuf_delete(packet);
> - return;
> - }
> - miniflow_initialize(&key.flow, key.buf);
> - miniflow_extract(packet, md, &key.flow);
> + for (i = 0; i < c; i++) {
> + struct dp_netdev_flow *netdev_flow;
> + struct {
> + struct miniflow flow;
> + uint32_t buf[FLOW_U32S];
> + } key;
>
> - netdev_flow = dp_netdev_lookup_flow(dp, &key.flow);
> - if (netdev_flow) {
> - struct dp_netdev_actions *actions;
> + if (ofpbuf_size(packets[i]) < ETH_HEADER_LEN) {
> + ofpbuf_delete(packets[i]);
> + continue;
> + }
> + miniflow_initialize(&key.flow, key.buf);
> + miniflow_extract(packets[i], &md[i], &key.flow);
> +
We just need to reset mf->map, Can you add api for that ?
infact miniflow_extract() does not read from map, is there need to
initialize it on every packet?
> + netdev_flow = dp_netdev_lookup_flow(dp, &key.flow);
>
> - dp_netdev_flow_used(netdev_flow, packet, &key.flow);
> + if (netdev_flow) {
> + struct dp_netdev_actions *actions;
> +
> + dp_netdev_flow_used(netdev_flow, 1, ofpbuf_size(packets[i]),
> &key.flow);
>
> - actions = dp_netdev_flow_get_actions(netdev_flow);
> - dp_netdev_execute_actions(dp, &key.flow, packet, true, md,
> - actions->actions, actions->size);
> - dp_netdev_count_packet(dp, DP_STAT_HIT);
> - } else if (dp->handler_queues) {
> - dp_netdev_count_packet(dp, DP_STAT_MISS);
> - dp_netdev_output_userspace(dp, packet,
> - miniflow_hash_5tuple(&key.flow, 0)
> - % dp->n_handlers,
> - DPIF_UC_MISS, &key.flow, NULL);
> - ofpbuf_delete(packet);
> + actions = dp_netdev_flow_get_actions(netdev_flow);
> + dp_netdev_execute_actions(dp, &key.flow, packets[i], true,
> &md[i],
> + actions->actions, actions->size);
> + dp_netdev_count_packet(dp, DP_STAT_HIT, 1);
> + } else if (dp->handler_queues) {
> + dp_netdev_count_packet(dp, DP_STAT_MISS, 1);
> + dp_netdev_output_userspace(dp, packets[i],
> + miniflow_hash_5tuple(&key.flow, 0)
> + % dp->n_handlers,
> + DPIF_UC_MISS, &key.flow, NULL);
> + ofpbuf_delete(packets[i]);
> + }
> }
> }
>
> static void
> -dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet,
> - struct pkt_metadata *md)
> +dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf **packets,
> + int c, odp_port_t port_no)
> {
> uint32_t *recirc_depth = recirc_depth_get();
> + struct pkt_metadata md[c];
> +
> + int i;
> +
> + for (i = 0; i < c; i++) {
> + md[i] = PKT_METADATA_INITIALIZER(port_no);
> + }
>
This metadata is same for given batch, we can sent just one instance of it.
> *recirc_depth = 0;
> - dp_netdev_input(dp, packet, md);
> + dp_netdev_input(dp, packets, c, md);
> }
>
> static int
> @@ -2086,7 +2093,7 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct
> ofpbuf *packet,
>
> error = 0;
> } else {
> - dp_netdev_count_packet(dp, DP_STAT_LOST);
> + dp_netdev_count_packet(dp, DP_STAT_LOST, 1);
> error = ENOBUFS;
> }
> ovs_mutex_unlock(&q->mutex);
> @@ -2167,7 +2174,7 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet,
> recirc_md.recirc_id = nl_attr_get_u32(a);
>
> (*depth)++;
> - dp_netdev_input(aux->dp, recirc_packet, &recirc_md);
> + dp_netdev_input(aux->dp, &recirc_packet, 1, &recirc_md);
> (*depth)--;
>
> break;
> --
> 2.0.0.rc0
>
_______________________________________________
dev mailing list
[email protected]
http://openvswitch.org/mailman/listinfo/dev