This change in dpif-netdev allows faster packet receiving for devices like netdev-dpdk which implement batch receiving.
Signed-off-by: Daniele Di Proietto <ddiproie...@vmware.com> --- This patchset allows dpif-netdev to process packets in batches. It boosts the performance of OVS with DPDK (forwarding a single unicast flow) from 1.9 Mpps to 3.3 Mpps in my tests. It is the first optimization step. There's still a lot of room for improvement until we hit 14.88 Mpps. Can you help review this, please, Pravin? --- lib/dpif-netdev.c | 101 +++++++++++++++++++++++++++++------------------------- 1 file changed, 54 insertions(+), 47 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 91c83d6..d50452f 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -341,8 +341,8 @@ static void dp_netdev_execute_actions(struct dp_netdev *dp, struct pkt_metadata *, const struct nlattr *actions, size_t actions_len); -static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, - struct pkt_metadata *); +static void dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf **packets, + int c, odp_port_t port_no); static void dp_netdev_set_pmd_threads(struct dp_netdev *, int n); @@ -1747,17 +1747,12 @@ dp_netdev_process_rxq_port(struct dp_netdev *dp, struct dp_netdev_port *port, struct netdev_rxq *rxq) { - struct ofpbuf *packet[NETDEV_MAX_RX_BATCH]; + struct ofpbuf *packets[NETDEV_MAX_RX_BATCH]; int error, c; - error = netdev_rxq_recv(rxq, packet, &c); + error = netdev_rxq_recv(rxq, packets, &c); if (!error) { - struct pkt_metadata md = PKT_METADATA_INITIALIZER(port->port_no); - int i; - - for (i = 0; i < c; i++) { - dp_netdev_port_input(dp, packet[i], &md); - } + dp_netdev_port_input(dp, packets, c, port->port_no); } else if (error != EAGAIN && error != EOPNOTSUPP) { static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5); @@ -1954,7 +1949,7 @@ dp_netdev_flow_stats_new_cb(void) static void dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, - const struct ofpbuf *packet, + int c, int size, const struct miniflow *key) { uint16_t tcp_flags = miniflow_get_tcp_flags(key); @@ -1966,8 +1961,8 @@ dp_netdev_flow_used(struct dp_netdev_flow *netdev_flow, ovs_mutex_lock(&bucket->mutex); bucket->used = MAX(now, bucket->used); - bucket->packet_count++; - bucket->byte_count += ofpbuf_size(packet); + bucket->packet_count += c; + bucket->byte_count += size; bucket->tcp_flags |= tcp_flags; ovs_mutex_unlock(&bucket->mutex); } @@ -1981,61 +1976,73 @@ dp_netdev_stats_new_cb(void) } static void -dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type) +dp_netdev_count_packet(struct dp_netdev *dp, enum dp_stat_type type, int packets) { struct dp_netdev_stats *bucket; bucket = ovsthread_stats_bucket_get(&dp->stats, dp_netdev_stats_new_cb); ovs_mutex_lock(&bucket->mutex); - bucket->n[type]++; + bucket->n[type] += packets; ovs_mutex_unlock(&bucket->mutex); } static void -dp_netdev_input(struct dp_netdev *dp, struct ofpbuf *packet, +dp_netdev_input(struct dp_netdev *dp, struct ofpbuf **packets, int c, struct pkt_metadata *md) { - struct dp_netdev_flow *netdev_flow; - struct { - struct miniflow flow; - uint32_t buf[FLOW_U32S]; - } key; + int i; - if (ofpbuf_size(packet) < ETH_HEADER_LEN) { - ofpbuf_delete(packet); - return; - } - miniflow_initialize(&key.flow, key.buf); - miniflow_extract(packet, md, &key.flow); + for (i = 0; i < c; i++) { + struct dp_netdev_flow *netdev_flow; + struct { + struct miniflow flow; + uint32_t buf[FLOW_U32S]; + } key; - netdev_flow = dp_netdev_lookup_flow(dp, &key.flow); - if (netdev_flow) { - struct dp_netdev_actions *actions; + if (ofpbuf_size(packets[i]) < ETH_HEADER_LEN) { + ofpbuf_delete(packets[i]); + continue; + } + miniflow_initialize(&key.flow, key.buf); + miniflow_extract(packets[i], &md[i], &key.flow); + + netdev_flow = dp_netdev_lookup_flow(dp, &key.flow); - dp_netdev_flow_used(netdev_flow, packet, &key.flow); + if (netdev_flow) { + struct dp_netdev_actions *actions; + + dp_netdev_flow_used(netdev_flow, 1, ofpbuf_size(packets[i]), &key.flow); - actions = dp_netdev_flow_get_actions(netdev_flow); - dp_netdev_execute_actions(dp, &key.flow, packet, true, md, - actions->actions, actions->size); - dp_netdev_count_packet(dp, DP_STAT_HIT); - } else if (dp->handler_queues) { - dp_netdev_count_packet(dp, DP_STAT_MISS); - dp_netdev_output_userspace(dp, packet, - miniflow_hash_5tuple(&key.flow, 0) - % dp->n_handlers, - DPIF_UC_MISS, &key.flow, NULL); - ofpbuf_delete(packet); + actions = dp_netdev_flow_get_actions(netdev_flow); + dp_netdev_execute_actions(dp, &key.flow, packets[i], true, &md[i], + actions->actions, actions->size); + dp_netdev_count_packet(dp, DP_STAT_HIT, 1); + } else if (dp->handler_queues) { + dp_netdev_count_packet(dp, DP_STAT_MISS, 1); + dp_netdev_output_userspace(dp, packets[i], + miniflow_hash_5tuple(&key.flow, 0) + % dp->n_handlers, + DPIF_UC_MISS, &key.flow, NULL); + ofpbuf_delete(packets[i]); + } } } static void -dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf *packet, - struct pkt_metadata *md) +dp_netdev_port_input(struct dp_netdev *dp, struct ofpbuf **packets, + int c, odp_port_t port_no) { uint32_t *recirc_depth = recirc_depth_get(); + struct pkt_metadata md[c]; + + int i; + + for (i = 0; i < c; i++) { + md[i] = PKT_METADATA_INITIALIZER(port_no); + } *recirc_depth = 0; - dp_netdev_input(dp, packet, md); + dp_netdev_input(dp, packets, c, md); } static int @@ -2086,7 +2093,7 @@ dp_netdev_output_userspace(struct dp_netdev *dp, struct ofpbuf *packet, error = 0; } else { - dp_netdev_count_packet(dp, DP_STAT_LOST); + dp_netdev_count_packet(dp, DP_STAT_LOST, 1); error = ENOBUFS; } ovs_mutex_unlock(&q->mutex); @@ -2167,7 +2174,7 @@ dp_execute_cb(void *aux_, struct ofpbuf *packet, recirc_md.recirc_id = nl_attr_get_u32(a); (*depth)++; - dp_netdev_input(aux->dp, recirc_packet, &recirc_md); + dp_netdev_input(aux->dp, &recirc_packet, 1, &recirc_md); (*depth)--; break; -- 2.0.0.rc0 _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev